Description of Data

The files are the miseq reads that have been processed to identify the insertion, deletion, wild type, point mutations in the reads. The size (score) of the indel is indicated as well. Wild type/point mutations have score=0; deletion<0, insertion>0. Point mutations have a changed nucleotide in the sgRNA target as compared to the wild type.

Import data

#libraries that are needed
library("plyr")
library(deSolve)
library(minpack.lm)
library(FME)
## Loading required package: rootSolve
## Loading required package: coda
library("Biostrings")
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, cbind, colnames,
##     do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, lengths, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff,
##     sort, table, tapply, union, unique, unsplit, which, which.max,
##     which.min
## Loading required package: S4Vectors
## Loading required package: stats4
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:plyr':
## 
##     rename
## The following objects are masked from 'package:base':
## 
##     colMeans, colSums, expand.grid, rowMeans, rowSums
## Loading required package: IRanges
## 
## Attaching package: 'IRanges'
## The following object is masked from 'package:plyr':
## 
##     desc
## Loading required package: XVector
## 
## Attaching package: 'XVector'
## The following object is masked from 'package:plyr':
## 
##     compact
library("seqLogo")
## Loading required package: grid
#variables
time_all <- c(0, 4, 8, 10, 12, 14, 16, 18, 21, 24, 33, 37, 38, 42, 48, 54, 60) #time point + shield
time_c_all <- c(0,8,16,18,24,60,70) #time points -shield (t=70 is to plot -guide sample)

20151216 Run 3756

index LBR2 T1 LBR2 C T1,T2 LBR2 T2 LBR2 T3 LBR2 C T3 +/-NU7441 LBR2 T3 + NU7441 LBR2 T4 LBR2 C T4 +/-NU7441 LBR2 T4 + NU7441
A B C D E F K L M
1 T1 t=4 +shield T1 t=0 -shield T2 t=4 +shield T3 t=4 +shield T3 t=0 -shield T3N t=4 +shield T4 t=4 +shield T4 t=0 -shield T4N t=4 +shield
2 T1 t=8 +shield T1 t=8 -shield T2 t=8 +shield T3 t=8 +shield T3 t=8 -shield T3N t=8 +shield T4 t=8 +shield T4 t=8 -shield T4N t=8 +shield
3 T1 t=10 +shield T1 t=18 -shield T2 t=10 +shield T3 t=10 +shield T3 t=16 -shield T3N t=10 +shield T4 t=10 +shield T4 t=16 -shield T4N t=10 +shield
4 T1 t=12 +shield T2 t=12 +shield T3 t=12 +shield T3 t=24 -shield T3N t=12 +shield T4 t=12 +shield T4 t=24 -shield T4N t=12 +shield
5 T1 t=14 +shield T1 t=60 -shield T2 t=14 +shield T3 t=14 +shield T3 t=60 -shield T3N t=14 +shield T4 t=14 +shield T4 t=60 -shield T4N t=14 +shield
6 T1 t=16 +shield T1 -guide +shield T2 t=16 +shield T3 t=16 +shield T3 -guide +shield T3N t=16 +shield T4 t=16 +shield T4 -guide +shield T4N t=16 +shield
7 T1 t=18 +shield T2 t=0 -shield T2 t=18 +shield T3 t=18 +shield T3N t=60 -shield T3N t=18 +shield T4 t=18 +shield T4N t=0 -shield T4N t=18 +shield
8 T1 t=21 +shield T2 t=8 -shield T2 t=21 +shield T3 t=24 +shield T3N -guide +shield T3N t=24 +shield T4 t=21 +shield T4N t=8 -shield T4N t=21 +shield
9 T1 t=24 +shield T2 t=16 -shield T2 t=24 +shield T3 t=33 +shield T3N t=33 +shield T4 t=24 +shield T4N t=16 -shield T4N t=24 +shield
10 T1 t=33 +shield T2 t=24 -shield T2 t=33 +shield T3 t=37 +shield T3N t=37 +shield T4 t=33 +shield T4N t=24 -shield T4N t=33 +shield
11 T1 t=42 +shield T2 t=60 -shield T2 t=42 +shield T3 t=42 +shield T3N t=42 +shield T4 t=42 +shield T4N t=60 -shield T4N t=42 +shield
12 T1 =60 +shield T2 -guide +shield T2 =60 +shield T3 =60 +shield T3N t=60 +shield T4 t=60 +shield T4N -guide +shield T4N t=60 +shield

20160624 Run 4033

index AAVS1 T6 AAVS1 C T6, T8 AAVS1 T8 chr11 T6 chr11 C T6,T8 chr11 T8 LBR2 T7 LBR2 T7 C LBR2 T7 + NU7441
A B C E F G I J K
1 T6 t=4 +shield T6 t=0 -shield T8 t=4 +shield T6 t=4 +shield T6 t=0 -shield T8 t=4 +shield T7 t=4 +shield T7 t=0 -shield T7N t=4 +shield
2 T6 t=8 +shield T6 t=8 -shield T8 t=8 +shield T6 t=8 +shield T6 t=8 -shield T8 t=8 +shield T7 t=8 +shield T7 t=8 -shield T7N t=8 +shield
3 T6 t=10 +shield T8 t=10 +shield T6 t=10 +shield T6 t=16 -shield T8 t=10 +shield T7 t=10 +shield T7 t=16 -shield T7N t=10 +shield
4 T6 t=12 +shield T6 t=24 -shield T8 t=12 +shield T6 t=12 +shield T6 t=24 -shield T8 t=12 +shield T7 t=12 +shield T7 t=24 -shield T7N t=12 +shield
5 T6 t=14 +shield T6 t=60 -shield T8 t=14 +shield T6 t=14 +shield T6 t=60 -shield T8 t=14 +shield T7 t=14 +shield T7 t=60 -shield T7N t=14 +shield
6 T6 t=16 +shield T8 t=16 +shield T6 t=16 +shield T6 -guide +shield T8 t=16 +shield T7 t=16 +shield T7 -guide +shield T7N t=16 +shield
7 T6 t=18 +shield T8 t=0 -shield T8 t=18 +shield T6 t=18 +shield T8 t=0 -shield T8 t=18 +shield T7 t=18 +shield T7N t=0 -shield T7N t=18 +shield
8 T6 t=21 +shield T8 t=8 -shield T8 t=21 +shield T6 t=21 +shield T8 t=8 -shield T8 t=21 +shield T7 t=21 +shield T7N t=8 -shield T7N t=21 +shield
9 T6 t=24 +shield T8 t=16 -shield T8 t=24 +shield T6 t=24 +shield T8 t=16 -shield T8 t=24 +shield T7 t=24 +shield T7N t=16 -shield T7N t=24 +shield
10 T6 t=33 +shield T8 t=24 -shield T8 t=33 +shield T6 t=33 +shield T8 t=24 -shield T8 t=33 +shield T7 t=33 +shield T7N t=24 -shield T7N t=33 +shield
11 T6 t=42 +shield T8 t=60 -shield T8 t=42 +shield T6 t=42 +shield T8 t=60 -shield T8 t=42 +shield T7 t=42 +shield T7N t=60 -shield T7N t=42 +shield
12 T6 t=60 +shield T8 -guide +shield T8 =60 +shield T6 =60 +shield T8 -guide +shield T8 t=60 +shield T7 t=60 +shield T7N -guide +shield T7N t=60 +shield

20161025 Run 4211

index LBR2 T9 LBR2 T9 + NU7441 LBR2 T9 C +/-NU7441
A B C
1 T9 t=4 +shield T9N t=4 +shield T9 t=0 -shield
2 T9 t=8 +shield T9N t=8 +shield T9 t=8 -shield
3 T9 t=10 +shield T9N t=10 +shield T9 t=16 -shield
4 T9 t=12 +shield T9N t=12 +shield T9 t=24 -shield
5 T9 t=14 +shield T9N t=14 +shield T9 t=60 -shield
6 T9 t=16 +shield T9N t=16 +shield T9 -guide +shield
7 T9 t=18 +shield T9N t=18 +shield T9N t=0 -shield
8 T9 t=21 +shield T9N t=21 +shield T9N t=8 -shield
9 T9 t=24 +shield T9N t=24 +shield T9N t=16 -shield
10 T9 t=33 +shield T9N t=33 +shield T9N t=24 -shield
11 T9 t=42 +shield T9N t=42 +shield T9N t=60 -shield
12 T9 t=60 +shield T9N t=60 +shield T9N -guide +shield

20170102 Run 4265

index LBR2 T10 LBR2 T10 C LBR8 T10 LBR2&8 T10 LBR8_2&8 T10 C
A B C D E
1 T10 t=4 +shield T10 t=0 -shield T10 t=4 +shield T10 t=4 +shield T10_8 t=0 -shield
2 T10 t=8 +shield T10 t=8 -shield T10 t=8 +shield T10 t=8 +shield T10_8 t=8 -shield
3 T10 t=10 +shield T10 t=16 -shield T10 t=10 +shield T10 t=10 +shield T10_8 t=16 -shield
4 T10 t=12 +shield T10 t=24 -shield T10 t=12 +shield T10 t=12 +shield T10_8 t=24 -shield
5 T10 t=14 +shield T10 t=60 +shield T10 t=14 +shield T10 t=14 +shield T10_8 t=60 -shield
6 T10 t=16 +shield T10 -guide +shield T10 t=16 +shield T10 t=16 +shield T10 -guide +shield
7 T10 t=18 +shield T10 -guide -shield T10 t=18 +shield T10 t=18 +shield T10_2&8 t=0 -shield
8 T10 t=21 +shield T10 t=21 +shield T10 t=21 +shield T10_2&8 t=8 -shield
9 T10 t=24 +shield T10 t=24 +shield T10 t=24 +shield T10_2&8 t=16 -shield
10 T10 t=33 +shield T10 t=33 +shield T10 t=33 +shield T10_2&8 t=24 -shield
11 T10 t=42 +shield T10 t=42 +shield T10 t=42 +shield T10_2&8 t=60 -shield
12 T10 t=60 +shield T10 t=60 +shield T10 t=60 +shield T10 -guide +shield

20170912 Run 4604

index AAVS1 T11 AAVS1 T11 C chr11 T11 chr11 T11 C LBR8 T11 LBR8 T11 C LBR8 T12 LBR2_8 T12 LBR8_2 T12 C LBR2 T12 LBR2 T12 c
A B C D E F G H I J K
1 T11 t=4 +shield T11 t=0 -shield T11 t=4 +shield T11 t=0 -shield T11 t=4 +shield T11 t=0 -shield T12 t=4 +shield T12 t=4 +shield T12_8 t=0 -shield T12 t=4 +shield T12 t=0 -shield
2 T11 t=8 +shield T11 t=8 -shield T11 t=8 +shield T11 t=8 -shield T11 t=8 +shield T11 t=8 -shield T12 t=8 +shield T12 t=8 +shield T12_8 t=8 -shield T12 t=8 +shield T12 t=8 -shield
3 T11 t=10 +shield T11 t=16 -shield T11 t=10 +shield T11 t=16 -shield T11 t=10 +shield T11 t=16 -shield T12 t=10 +shield T12 t=10 +shield T12_8 t=16 -shield T12 t=10 +shield T12 t=16 -shield
4 T11 t=12 +shield T11 t=24 -shield T11 t=12 +shield T11 t=24 -shield T11 t=12 +shield T11 t=24 -shield T12 t=10 +shield T12 t=12 +shield T12_8 t=24 -shield T12 t=12 +shield T12 t=24 -shield
5 T11 t=14 +shield T11 t=60 -shield T11 t=14 +shield T11 t=60 -shield T11 t=14 +shield T11 t=60 -shield T12 t=14 +shield T12 t=14 +shield T12_8 t=60 -shield T12 t=14 +shield T12 t=60 -shield
6 T11 t=16 +shield T11 -guide +shield T11 t=16 +shield T11 -guide +shield T11 t=16 +shield T11 -guide +shield T12 t=16 +shield T12 t=16 +shield T12 -guide +shield T12 t=16 +shield T12 -guide +shield
7 T11 t=18 +shield T11 T=87 +shield T11 t=18 +shield T11 T=87 +shield T11 t=18 +shield T11 T=87 +shield T12 t=18 +shield T12 t=18 +shield T12_2_8 t=0 -shield T12 t=18 +shield T12 -guide -shield
8 T11 t=21 +shield T11 t=21 +shield T11 t=21 +shield T12 t=21 +shield T12 t=21 +shield T12_2_8 t=8 -shield T12 t=21 +shield
9 T11 t=24 +shield T11 t=24 +shield T11 t=24 +shield T12 t=24 +shield T12 t=24 +shield T12_2_8 t=16 -shield T12 t=24 +shield
10 T11 t=33 +shield T11 t=33 +shield T11 t=33 +shield T12 t=33 +shield T12 t=33 +shield T12_2_8 t=24 -shield T12 t=33 +shield
11 T11 t=42 +shield T11 t=42 +shield T11 t=42 +shield T12 t=42 +shield T12 t=42 +shield T12_2_8 t=60 -shield T12 t=42 +shield
12 T11 t=60 +shield T11 t=60 +shield T11 t=60 +shield T12 t=60 +shield T12 t=60 +shield T12 -guide -shield T12 t=60 +shield

20171207 Run 4718

index AAVS1 T13 AAVS1 T13.2 AAVS1 T13_T13.3 C AAVS1 T14 AAVS1 T14.2 AAVS1 T14_T14.3 C AAVS1 T14 C LBR8 T13 LBR8 T13.2 LBR8 T13_T13.2 C LBR8 T14 LBR8 T14 C chr11 T14 chr11 T14.2 chr11 T14_T14.2 C chr11 T14 C
A B C D E F G H I J K L M N O P
1 T13 t=4 +shield T13.2 t=4 +shield T13 t=54 +shield T14 t=4 +shield T14.2 t=4 +shield T14 t=48 +shield T14 -guide +shield T13 t=4 +shield T13.2 t=4 +shield T13 t=54 +shield T14 t=4 +shield T14 t=48 +shield T14 t=4 +shield T14.2 t=4 +shield T14 t=48 +shield
2 T13 t=8 +shield T13.2 t=8 +shield T13 t=60 +shield T14 t=8 +shield T14.2 t=8 +shield T14 t=54 +shield T14 -guide -shield T13 t=8 +shield T13.2 t=8 +shield T13 t=60 +shield T14 t=8 +shield T14 t=54 +shield T14 t=8 +shield T14.2 t=8 +shield T14 t=54 +shield
3 T13 t=10 +shield T13.2 t=10 +shield T13 t=0 -shield T14 t=10 +shield T14.2 t=10 +shield T14 t=60 +shield T13 t=10 +shield T13.2 t=10 +shield T13 t=0 -shield T14 t=10 +shield T14 t=60 +shield T14 t=10 +shield T14.2 t=10 +shield T14 t=60 +shield T14 -guide +shield
4 T13 t=12 +shield T13.2 t=12 +shield T13 t=24 -shield T14 t=12 +shield T14.2 t=12 +shield T14 t=0 -shield T13 t=12 +shield T13.2 t=12 +shield T13 t=24 -shield T14 t=12 +shield T14 t=0 -shield T14 t=12 +shield T14.2 t=12 +shield T14 t=0 -shield T14 -guide -shield
5 T13 t=14 +shield T13.2 t=14 +shield T13 t=60 -shield T14 t=14 +shield T14.2 t=14 +shield T14 t=24 -shield T13 t=14 +shield T13.2 t=14 +shield T13 t=60 -shield T14 t=14 +shield T14 t=24 -shield T14 t=14 +shield T14.2 t=14 +shield T14 t=24 -shield
6 T13 t=16 +shield T13.2 t=16 +shield T13 -guide +shield T14 t=16 +shield T14.2 t=16 +shield T14 t=60 -shield T13 t=16 +shield T13.2 t=16 +shield T13 -guide +shield T14 t=16 +shield T14 t=60 -shield T14 t=16 +shield T14.2 t=16 +shield T14 t=60 -shield
7 T13 t=18 +shield T13.2 t=18 +shield T13.2 t=54 +shield T14 t=18 +shield T14.2 t=18 +shield T14.2 t=48 +shield T13 t=18 +shield T13.2 t=18 +shield T13.2 t=54 +shield T14 t=18 -shield T14 -guide +shield T14 t=18 +shield T14.2 t=18 +shield T14.2 t=48 +shield
8 T13 t=21 +shield T13.2 t=21 +shield T13.2 t=60 +shield T14 t=21 +shield T14.2 t=21 +shield T14.2 t=54 +shield T13 t=21 +shield T13.2 t=21 +shield T13.2 t=60 +shield T14 t=21 +shield T14 t=21 +shield T14.2 t=21 +shield T14.2 t=54 +shield
9 T13 t=24 +shield T13.2 t=24 +shield T13.2 t=0 -shield T14 t=24 +shield T14.2 t=24 +shield T14.2 t=60 +shield T13 t=24 +shield T13.2 t=24 +shield T13.2 t=0 -shield T14 t=24 +shield T14 t=24 +shield T14.2 t=24 +shield T14.2 t=60 +shield
10 T13 t=33 +shield T13.2 t=33 +shield T13.2 t=24 -shield T14 t=33 +shield T14.2 t=33 +shield T14.2 t=0 -shield T13 t=33 +shield T13.2 t=33 +shield T13.2 t=24 -shield T14 t=33 +shield T14 t=33 +shield T14.2 t=33 +shield T14.2 t=0 +shield
11 T13 t=42 +shield T13.2 t=42 +shield T13.2 t=60 -shield T14 t=38 +shield T14.2 t=38 +shield T14.2 t=24 -shield T13 t=42 +shield T13.2 t=42 +shield T13.2 t=60 -shield T14 t=38 +shield T14 t=38 +shield T14.2 t=38 +shield T14.2 t=24 +shield
12 T13 t=48 +shield T13.2 t=48 +shield T13.2 -guide -shield T14 t=42 +shield T14.2 t=42 +shield T14.2 t=60 -shield T13 t=48 +shield T13.2 t=48 +shield T13.2 -guide -shield T14 t=42 +shield T14 t=42 +shield T14.2 t=42 +shield T14.2 t=60 +shield

Import the sum of all types of identified mutations

The sum of insertions, deletions, point mutations, wild type per sample are calculated.

One sample is one time point in one time series

#each sample has one index for timeseries id and one index to identify the time point.

samples <- dir()
IdxA_3756_s <- grep('Idx-A', grep('sum', grep('3756', samples, value=TRUE), value=TRUE), value=TRUE) 
IdxB_3756_s <- grep('Idx-B', grep('sum', grep('3756', samples, value=TRUE), value=TRUE), value=TRUE) 
IdxC_3756_s <- grep('Idx-C', grep('sum', grep('3756', samples, value=TRUE), value=TRUE), value=TRUE) 
IdxD_3756_s <- grep('Idx-D', grep('sum', grep('3756', samples, value=TRUE), value=TRUE), value=TRUE) 
IdxE_3756_s <- grep('Idx-E', grep('sum', grep('3756', samples, value=TRUE), value=TRUE), value=TRUE) 
IdxF_3756_s <- grep('Idx-F', grep('sum', grep('3756', samples, value=TRUE), value=TRUE), value=TRUE) 
IdxK_3756_s <- grep('Idx-K', grep('sum', grep('3756', samples, value=TRUE), value=TRUE), value=TRUE) 
IdxL_3756_s <- grep('Idx-L', grep('sum', grep('3756', samples, value=TRUE), value=TRUE), value=TRUE) 
IdxM_3756_s <- grep('Idx-M', grep('sum', grep('3756', samples, value=TRUE), value=TRUE), value=TRUE) 

IdxA_4033_s <- grep('Idx-A', grep('sum', grep('4033', samples, value=TRUE), value=TRUE), value=TRUE) 
IdxB_4033_s <- grep('Idx-B', grep('sum', grep('4033', samples, value=TRUE), value=TRUE), value=TRUE) 
IdxC_4033_s <- grep('Idx-C', grep('sum', grep('4033', samples, value=TRUE), value=TRUE), value=TRUE) 
IdxE_4033_s <- grep('Idx-E', grep('sum', grep('4033', samples, value=TRUE), value=TRUE), value=TRUE) 
IdxF_4033_s <- grep('Idx-F', grep('sum', grep('4033', samples, value=TRUE), value=TRUE), value=TRUE) 
IdxG_4033_s <- grep('Idx-G', grep('sum', grep('4033', samples, value=TRUE), value=TRUE), value=TRUE) 
IdxI_4033_s <- grep('Idx-I', grep('sum', grep('4033', samples, value=TRUE), value=TRUE), value=TRUE) 
IdxJ_4033_s <- grep('Idx-J', grep('sum', grep('4033', samples, value=TRUE), value=TRUE), value=TRUE) 
IdxK_4033_s <- grep('Idx-K', grep('sum', grep('4033', samples, value=TRUE), value=TRUE), value=TRUE) 

IdxA_4211_s <- grep('Idx-A', grep('sum', grep('4211', samples, value=TRUE), value=TRUE), value=TRUE) 
IdxB_4211_s <- grep('Idx-B', grep('sum', grep('4211', samples, value=TRUE), value=TRUE), value=TRUE) 
IdxC_4211_s <- grep('Idx-C', grep('sum', grep('4211', samples, value=TRUE), value=TRUE), value=TRUE) 

IdxA_4265_s <- grep('Idx-A', grep('sum', grep('4265', grep('R1', samples, value=TRUE), value=TRUE), value=TRUE), value=TRUE)
IdxB_4265_s <- grep('Idx-B', grep('sum', grep('4265', grep('R1', samples, value=TRUE), value=TRUE), value=TRUE), value=TRUE)
IdxC_4265_s <- grep('Idx-C', grep('sum', grep('4265', grep('R1', samples, value=TRUE), value=TRUE), value=TRUE), value=TRUE)
IdxD_4265_s <- grep('Idx-D', grep('sum', grep('4265', grep('R1', samples, value=TRUE), value=TRUE), value=TRUE), value=TRUE)
IdxE_4265_s <- grep('Idx-E', grep('sum_output_', grep('4265', grep('R1', samples, value=TRUE), value=TRUE), value=TRUE), value=TRUE)
IdxE8_4265_s <- grep('Idx-E', grep('sum_output8_', grep('4265', grep('R1', samples, value=TRUE), value=TRUE), value=TRUE), value=TRUE)

IdxA_4604_s <- grep('Idx-A', grep('sum', grep('4604', grep('R1', samples, value=TRUE), value=TRUE), value=TRUE),  value=TRUE)
IdxB_4604_s <- grep('Idx-B', grep('sum', grep('4604', grep('R1', samples, value=TRUE), value=TRUE), value=TRUE),  value=TRUE) 
IdxC_4604_s <- grep('Idx-C', grep('sum', grep('4604', grep('R1', samples, value=TRUE), value=TRUE), value=TRUE), value=TRUE) 
IdxD_4604_s <- grep('Idx-D', grep('sum', grep('4604', grep('R1', samples, value=TRUE), value=TRUE), value=TRUE), value=TRUE) 
IdxE_4604_s <- grep('Idx-E', grep('sum', grep('4604', grep('R1', samples, value=TRUE), value=TRUE), value=TRUE), value=TRUE) 
IdxF_4604_s <- grep('Idx-F', grep('sum', grep('4604', grep('R1', samples, value=TRUE), value=TRUE), value=TRUE), value=TRUE) 
IdxG_4604_s <- grep('Idx-G', grep('sum', grep('4604', grep('R1', samples, value=TRUE), value=TRUE), value=TRUE), value=TRUE)
IdxI_4604_s <- grep('Idx-I', grep('sum_output_', grep('4604', grep('R1', samples, value=TRUE), value=TRUE), value=TRUE), value=TRUE)
IdxI8_4604_s <- grep('Idx-I', grep('sum_output8_', grep('4604', grep('R1', samples, value=TRUE), value=TRUE), value=TRUE), value=TRUE)
IdxH_4604_s <- grep('Idx-H', grep('sum_output_', grep('4604', grep('R1', samples, value=TRUE), value=TRUE), value=TRUE), value=TRUE) 
IdxJ_4604_s <- grep('Idx-J', grep('sum', grep('4604', grep('R1', samples, value=TRUE), value=TRUE), value=TRUE), value=TRUE) 
IdxK_4604_s <- grep('Idx-K', grep('sum', grep('4604', grep('R1', samples, value=TRUE), value=TRUE), value=TRUE), value=TRUE) 

IdxA_4718_s <- grep('Idx-A', grep('sum', grep('4718', grep('R1', samples, value=TRUE), value=TRUE), value=TRUE),  value=TRUE)
IdxB_4718_s <- grep('Idx-B', grep('sum', grep('4718', grep('R1', samples, value=TRUE), value=TRUE), value=TRUE),  value=TRUE) 
IdxC_4718_s <- grep('Idx-C', grep('sum', grep('4718', grep('R1', samples, value=TRUE), value=TRUE), value=TRUE), value=TRUE) 
IdxD_4718_s <- grep('Idx-D', grep('sum', grep('4718', grep('R1', samples, value=TRUE), value=TRUE), value=TRUE), value=TRUE) 
IdxE_4718_s <- grep('Idx-E', grep('sum', grep('4718', grep('R1', samples, value=TRUE), value=TRUE), value=TRUE), value=TRUE) 
IdxF_4718_s <- grep('Idx-F', grep('sum', grep('4718', grep('R1', samples, value=TRUE), value=TRUE), value=TRUE), value=TRUE) 
IdxG_4718_s <- grep('Idx-G', grep('sum', grep('4718', grep('R1', samples, value=TRUE), value=TRUE), value=TRUE), value=TRUE)
IdxH_4718_s <- grep('Idx-H', grep('sum', grep('4718', grep('R1', samples, value=TRUE), value=TRUE), value=TRUE), value=TRUE)
IdxI_4718_s <- grep('Idx-I', grep('sum', grep('4718', grep('R1', samples, value=TRUE), value=TRUE), value=TRUE), value=TRUE)
IdxI_4718_s <- grep('Idx-I', grep('sum', grep('4718', grep('R1', samples, value=TRUE), value=TRUE), value=TRUE), value=TRUE)
IdxJ_4718_s <- grep('Idx-J', grep('sum', grep('4718', grep('R1', samples, value=TRUE), value=TRUE), value=TRUE), value=TRUE) 
IdxK_4718_s <- grep('Idx-K', grep('sum', grep('4718', grep('R1', samples, value=TRUE), value=TRUE), value=TRUE), value=TRUE) 
IdxL_4718_s <- grep('Idx-L', grep('sum', grep('4718', grep('R1', samples, value=TRUE), value=TRUE), value=TRUE), value=TRUE)
IdxM_4718_s <- grep('Idx-M', grep('sum', grep('4718', grep('R1', samples, value=TRUE), value=TRUE), value=TRUE), value=TRUE)
IdxN_4718_s <- grep('Idx-N', grep('sum', grep('4718', grep('R1', samples, value=TRUE), value=TRUE), value=TRUE), value=TRUE)
IdxO_4718_s <- grep('Idx-O', grep('sum', grep('4718', grep('R1', samples, value=TRUE), value=TRUE), value=TRUE), value=TRUE)
IdxP_4718_s <- grep('Idx-P', grep('sum', grep('4718', grep('R1', samples, value=TRUE), value=TRUE), value=TRUE), value=TRUE)


#sort the time points in the correct order
ordF = function(x){
  x[order(sapply(x,function(x){
    as.numeric(strsplit(strsplit(x,'_')[[1]][3],'-')[[1]][2])}
    ))]
  }

IdxA_3756_s = ordF(IdxA_3756_s);
IdxB_3756_s = ordF(IdxB_3756_s);
IdxC_3756_s = ordF(IdxC_3756_s);
IdxD_3756_s = ordF(IdxD_3756_s);
IdxE_3756_s = ordF(IdxE_3756_s);
IdxF_3756_s = ordF(IdxF_3756_s);
IdxK_3756_s = ordF(IdxK_3756_s);
IdxL_3756_s = ordF(IdxL_3756_s);
IdxM_3756_s = ordF(IdxM_3756_s);

IdxA_4033_s = ordF(IdxA_4033_s);
IdxB_4033_s = ordF(IdxB_4033_s);
IdxC_4033_s = ordF(IdxC_4033_s);
IdxE_4033_s = ordF(IdxE_4033_s);
IdxF_4033_s = ordF(IdxF_4033_s);
IdxG_4033_s = ordF(IdxG_4033_s);
IdxI_4033_s = ordF(IdxI_4033_s);
IdxJ_4033_s = ordF(IdxJ_4033_s);
IdxK_4033_s = ordF(IdxK_4033_s);

IdxA_4211_s = ordF(IdxA_4211_s);
IdxB_4211_s = ordF(IdxB_4211_s);
IdxC_4211_s = ordF(IdxC_4211_s);

IdxA_4265_s = ordF(IdxA_4265_s);
IdxB_4265_s = ordF(IdxB_4265_s);
IdxC_4265_s = ordF(IdxC_4265_s);
IdxE8_4265_s = ordF(IdxE8_4265_s);

IdxA_4604_s = ordF(IdxA_4604_s);
IdxB_4604_s = ordF(IdxB_4604_s);
IdxC_4604_s = ordF(IdxC_4604_s);
IdxD_4604_s = ordF(IdxD_4604_s);
IdxE_4604_s = ordF(IdxE_4604_s);
IdxF_4604_s = ordF(IdxF_4604_s);
IdxG_4604_s = ordF(IdxG_4604_s);
IdxI8_4604_s = ordF(IdxI8_4604_s);
IdxJ_4604_s = ordF(IdxJ_4604_s);
IdxK_4604_s = ordF(IdxK_4604_s);

IdxA_4718_s = ordF(IdxA_4718_s);
IdxB_4718_s = ordF(IdxB_4718_s);
IdxC_4718_s = ordF(IdxC_4718_s);
IdxD_4718_s = ordF(IdxD_4718_s);
IdxE_4718_s = ordF(IdxE_4718_s);
IdxF_4718_s = ordF(IdxF_4718_s);
IdxG_4718_s = ordF(IdxG_4718_s);
IdxH_4718_s = ordF(IdxH_4718_s);
IdxI_4718_s = ordF(IdxI_4718_s);
IdxJ_4718_s = ordF(IdxJ_4718_s);
IdxK_4718_s = ordF(IdxK_4718_s);
IdxL_4718_s = ordF(IdxL_4718_s);
IdxM_4718_s = ordF(IdxM_4718_s);
IdxN_4718_s = ordF(IdxN_4718_s);
IdxO_4718_s = ordF(IdxO_4718_s);
IdxP_4718_s = ordF(IdxP_4718_s);

#import the files
t1      <- lapply(IdxA_3756_s, read.table, header = FALSE)
t1_2_c  <- lapply(IdxB_3756_s, read.table, header = FALSE)
t2      <- lapply(IdxC_3756_s, read.table, header = FALSE)
t3      <- lapply(IdxD_3756_s, read.table, header = FALSE)
t3_c    <- lapply(IdxE_3756_s, read.table, header = FALSE)
t3_N    <- lapply(IdxF_3756_s, read.table, header = FALSE)
t4      <- lapply(IdxK_3756_s, read.table, header = FALSE)
t4_N_c  <- lapply(IdxL_3756_s, read.table, header = FALSE)
t4_N    <- lapply(IdxM_3756_s, read.table, header = FALSE)

t6_A    <- lapply(IdxA_4033_s, read.table, header = FALSE)
t6_8_A_c<- lapply(IdxB_4033_s, read.table, header = FALSE)
t8_A    <- lapply(IdxC_4033_s, read.table, header = FALSE)
t6_11   <- lapply(IdxE_4033_s, read.table, header = FALSE)
t6_8_11_c<-lapply(IdxF_4033_s, read.table, header = FALSE)
t8_11   <- lapply(IdxG_4033_s, read.table, header = FALSE)
t7      <- lapply(IdxI_4033_s, read.table, header = FALSE)
t7_c    <- lapply(IdxJ_4033_s, read.table, header = FALSE)
t7_N    <- lapply(IdxK_4033_s, read.table, header = FALSE)

t9      <- lapply(IdxA_4211_s, read.table, header = FALSE)
t9_N    <- lapply(IdxB_4211_s, read.table, header = FALSE)
t9_c    <- lapply(IdxC_4211_s, read.table, header = FALSE)

t10_2    <- lapply(IdxA_4265_s, read.table, header = FALSE)
t10_2_c  <- lapply(IdxB_4265_s, read.table, header = FALSE)
t10_8    <- lapply(IdxC_4265_s, read.table, header = FALSE)
t10_8_c  <- lapply(IdxE_4265_s, read.table, header = FALSE)

t11_A    <- lapply(IdxA_4604_s, read.table, header = FALSE)
t11_A_c  <- lapply(IdxB_4604_s, read.table, header = FALSE)
t11_11   <- lapply(IdxC_4604_s, read.table, header = FALSE)
t11_11_c <- lapply(IdxD_4604_s, read.table, header = FALSE)
t11_8    <- lapply(IdxE_4604_s, read.table, header = FALSE)
t11_8_c  <- lapply(IdxF_4604_s, read.table, header = FALSE)
t12_2    <- lapply(IdxJ_4604_s, read.table, header = FALSE)
t12_2_c  <- lapply(IdxK_4604_s, read.table, header = FALSE)
t12_8    <- lapply(IdxG_4604_s, read.table, header = FALSE)
t12_8_c  <- lapply(IdxI8_4604_s, read.table, header = FALSE)

t13_A    <- lapply(IdxA_4718_s, read.table, header = FALSE)
t13.2_A  <- lapply(IdxB_4718_s, read.table, header = FALSE)
t13_A_c  <- lapply(IdxC_4718_s, read.table, header = FALSE)
t14_A    <- lapply(IdxD_4718_s, read.table, header = FALSE)
t14.2_A  <- lapply(IdxE_4718_s, read.table, header = FALSE)
t14_A_c  <- lapply(IdxF_4718_s, read.table, header = FALSE)
t14_A_c2 <- lapply(IdxG_4718_s, read.table, header = FALSE)
t13_8    <- lapply(IdxH_4718_s, read.table, header = FALSE)
t13.2_8  <- lapply(IdxI_4718_s, read.table, header = FALSE)
t13_8_c  <- lapply(IdxJ_4718_s, read.table, header = FALSE)
t14_8    <- lapply(IdxK_4718_s, read.table, header = FALSE)
t14_8_c  <- lapply(IdxL_4718_s, read.table, header = FALSE)
t14_11   <- lapply(IdxM_4718_s, read.table, header = FALSE)
t14.2_11 <- lapply(IdxN_4718_s, read.table, header = FALSE)
t14_11_c <- lapply(IdxO_4718_s, read.table, header = FALSE)
t14_11_c2<- lapply(IdxP_4718_s, read.table, header = FALSE)

#make a list to group all the timeseries 
Y <- list(t1, 
          t1_2_c, 
          t2, 
          t3, 
          t3_c, 
          t3_N,
          t4, 
          t4_N_c, 
          t4_N,
          t6_A, 
          t6_8_A_c,
          t8_A,   
          t6_11, 
          t6_8_11_c,
          t8_11,  
          t7,    
          t7_c,   
          t7_N,
          t9,
          t9_N,
          t9_c,
          t10_2,
          t10_2_c,
          t10_8,
          t10_8_c,
          t11_A,
          t11_A_c,
          t11_11, 
          t11_11_c, 
          t11_8,
          t11_8_c,
          t12_2,
          t12_2_c,
          t12_8,
          t12_8_c,
          t13_A,
          t13.2_A,
          t13_A_c,
          t14_A,
          t14.2_A,
          t14_A_c,
          t14_A_c2,
          t13_8,
          t13.2_8,
          t13_8_c,
          t14_8,
          t14_8_c,
          t14_11,
          t14.2_11,
          t14_11_c,
          t14_11_c2)

#idx names of the various miseq runs
names_samples <- c("1A","1B", "1C", "1D", "1E", "1F", "1K", "1L", "1M", "2A","2B", "2C", "2E", "2F", "2G", "2I", "2J", "2K", "3A","3B", "3C", "4A", "4B", "4C", "4E", "5A" , "5B", "5C", "5D", "5E", "5F", "5J", "5K", "5G","5H8", "6A", "6B","6C","6D","6E","6F","6G","6H","6I","6J","6K","6L","6M","6N","6O","6P")

names(Y)<- names_samples

# Calculate total amount of reads per sample
df3 <- NA
df3b <- NA
for (j in 1:length(Y)){
  df3 <- NA
  for (i in 1:length(Y[[j]])){
    df3[i] <- (Y[[j]][[i]][1])
  }
  df3b <- NA
  df3b <- lapply(df3, as.numeric)  
  if(j==1){t1_sum       <- sapply(df3b, sum)}
  if(j==2){t1_2_c_sum   <- sapply(df3b, sum)}
  if(j==3){t2_sum       <- sapply(df3b, sum)}
  if(j==4){t3_sum       <- sapply(df3b, sum)}
  if(j==5){t3_c_sum     <- sapply(df3b, sum)}
  if(j==6){t3_N_sum     <- sapply(df3b, sum)}
  if(j==7){t4_sum       <- sapply(df3b, sum)}  
  if(j==8){t4_N_c_sum   <- sapply(df3b, sum)} 
  if(j==9){t4_N_sum     <- sapply(df3b, sum)}
  if(j==10){t6_A_sum    <- sapply(df3b, sum)}
  if(j==11){t6_8_A_c_sum<- sapply(df3b, sum)}
  if(j==12){t8_A_sum    <- sapply(df3b, sum)}
  if(j==13){t6_11_sum   <- sapply(df3b, sum)}
  if(j==14){t6_8_11_c_sum<- sapply(df3b, sum)}
  if(j==15){t8_11_sum   <- sapply(df3b, sum)}
  if(j==16){t7_sum      <- sapply(df3b, sum)}
  if(j==17){t7_c_sum    <- sapply(df3b, sum)}
  if(j==18){t7_N_sum    <- sapply(df3b, sum)}
  if(j==19){t9_sum      <- sapply(df3b, sum)}
  if(j==20){t9_N_sum    <- sapply(df3b, sum)}
  if(j==21){t9_c_sum    <- sapply(df3b, sum)} 
  if(j==22){t10_2_sum   <- sapply(df3b, sum)}
  if(j==23){t10_2_c_sum <- sapply(df3b, sum)}
  if(j==24){t10_8_sum   <- sapply(df3b, sum)}
  if(j==25){t10_8_c_sum <- sapply(df3b, sum)}
  if(j==26){t11_A_sum   <- sapply(df3b, sum)}
  if(j==27){t11_A_c_sum <- sapply(df3b, sum)}
  if(j==28){t11_11_sum  <- sapply(df3b, sum)}
  if(j==29){t11_11_c_sum<- sapply(df3b, sum)}
  if(j==30){t11_8_sum   <- sapply(df3b, sum)}
  if(j==31){t11_8_c_sum <- sapply(df3b, sum)}
  if(j==32){t12_2_sum   <- sapply(df3b, sum)}
  if(j==33){t12_2_c_sum <- sapply(df3b, sum)}
  if(j==34){t12_8_sum   <- sapply(df3b, sum)}
  if(j==35){t12_8_c_sum <- sapply(df3b, sum)}
  if(j==36){t13_A_sum   <- sapply(df3b, sum)}
  if(j==37){t13.2_A_sum <- sapply(df3b, sum)}
  if(j==38){t13_A_c_sum <- sapply(df3b, sum)}
  if(j==39){t14_A_sum   <- sapply(df3b, sum)}
  if(j==40){t14.2_A_sum <- sapply(df3b, sum)}
  if(j==41){t14_A_c_sum <- sapply(df3b, sum)}
  if(j==42){t14_A_c2_sum<- sapply(df3b, sum)}
  if(j==43){t13_8_sum   <- sapply(df3b, sum)}
  if(j==44){t13.2_8_sum <- sapply(df3b, sum)}
  if(j==45){t13_8_c_sum <- sapply(df3b, sum)}
  if(j==46){t14_8_sum   <- sapply(df3b, sum)}
  if(j==47){t14_8_c_sum <- sapply(df3b, sum)}
  if(j==48){t14_11_sum  <- sapply(df3b, sum)}
  if(j==49){t14.2_11_sum<- sapply(df3b, sum)}
  if(j==50){t14_11_c_sum<- sapply(df3b, sum)}
  if(j==51){t14_11_c2_sum <-sapply(df3b, sum)}
}

#make a list to group all the timeseries 
Z <- list(t1_sum, 
          t1_2_c_sum, 
          t2_sum, 
          t3_sum, 
          t3_c_sum, 
          t3_N_sum, 
          t4_sum, 
          t4_N_c_sum, 
          t4_N_sum,
          t6_A_sum, 
          t6_8_A_c_sum,
          t8_A_sum,   
          t6_11_sum, 
          t6_8_11_c_sum,
          t8_11_sum,  
          t7_sum,    
          t7_c_sum,   
          t7_N_sum,
          t9_sum,
          t9_N_sum,
          t9_c_sum,
          t10_2_sum,
          t10_2_c_sum,
          t10_8_sum,
          t10_8_c_sum,
          t11_A_sum,
          t11_A_c_sum,
          t11_11_sum, 
          t11_11_c_sum, 
          t11_8_sum,
          t11_8_c_sum,
          t12_2_sum,
          t12_2_c_sum,
          t12_8_sum,
          t12_8_c_sum,
          t13_A_sum,
          t13.2_A_sum,
          t13_A_c_sum,
          t14_A_sum,
          t14.2_A_sum,
          t14_A_c_sum,
          t14_A_c2_sum,
          t13_8_sum,
          t13.2_8_sum,
          t13_8_c_sum,
          t14_8_sum,
          t14_8_c_sum,
          t14_11_sum,
          t14.2_11_sum,
          t14_11_c_sum,
          t14_11_c2_sum)

#idx names of the various miseq runs
names(Z)<-names_samples

Import the quantity of specific indels

The specific type of indels are determined.

One sample is one time point in one time series

## logical(0)

Order samples

#Some timeseries are spread over multiple read indexes. 
#group one timeseries together in one variable

#sum of the various types 
t1_order <- c(t1_2_c[1], t1)
t2_order <- c(t1_2_c[7], t2)
t3_order <- c(t3_c[1], t3)
t3_N_order <- c(t3_c[1], t3_N)
t4_order <- c(t4_N_c[1], t4)
t4_N_order <- c(t4_N_c[7], t4_N)
t7_LBR2_order <- c(t7_c[1], t7)
t7_LBR2_N_order <- c(t7_c[7], t7_N)
t6_AAVS1_order <- c(t6_8_A_c[1], t6_A)
t8_AAVS1_order <- c(t6_8_A_c[7], t8_A)
t6_fLAD_order <- c(t6_8_11_c[1], t6_11)
t8_fLAD_order <- c(t6_8_11_c[7], t8_11)
t9_order <- c(t9_c[1], t9)
t9_N_order <- c(t9_c[7], t9_N)
t10_2_order <- c(t10_2_c[1], t10_2)
t10_8_order <- c(t10_8_c[1], t10_8)
t11_AAVS1_order <- c(t11_A_c[1], t11_A)
t11_fLAD_order <- c(t11_11_c[1], t11_11)
t11_LBR8_order <- c(t11_8_c[1], t11_8)

#t11_AAVS1_order <- c(t11_A_c[1], t11_A, t11_A_c[7])
#t11_fLAD_order <- c(t11_11_c[1], t11_11, t11_11_c[7])
#t11_LBR8_order <- c(t11_8_c[1], t11_8, t11_8_c[7])

t12_LBR2_order <- c(t12_2_c[1], t12_2)
t12_LBR8_order <- c(t12_8_c[1], t12_8)
t13_AAVS1_order <- c(t13_A_c[3], t13_A, t13_A_c[1:2])
t13_2_AAVS1_order <- c(t13_A_c[9], t13.2_A, t13_A_c[7:8])
t14_AAVS1_order <- c(t14_A_c[4], t14_A, t14_A_c[1:3])
t14_2_AAVS1_order <- c(t14_A_c[10], t14.2_A, t14_A_c[7:9])
t13_LBR8_order <- c(t13_8_c[3], t13_8, t13_8_c[1:2])
t13_2_LBR8_order <- c(t13_8_c[9], t13.2_8, t13_8_c[7:8])
t14_LBR8_order <- c(t14_8_c[4], t14_8, t14_8_c[1:3])
t14_fLAD_order <- c(t14_11_c[4], t14_11, t14_11_c[1:3])
t14_2_fLAD_order <- c(t14_11_c[10], t14.2_11, t14_11_c[7:9])

t1_c_order <- c(t1_2_c[1:3],t1_2_c[5:6])
t2_c_order <- c(t1_2_c[7:12])
t3_c_order <- c(t3_c[1:6])
t3_N_c_order <- c(t3_c[7:8])
t4_c_order <- c(t4_N_c[1:6])
t4_N_c_order <- c(t4_N_c[7:12])
t7_c_order <- c(t7_c[1:6])
t7_N_c_order <- c(t7_c[7:12])
t6_AAVS1_c_order <- c(t6_8_A_c[1:6])
t6_fLAD_c_order <- c(t6_8_11_c[1:6])
t8_AAVS1_c_order <- c(t6_8_A_c[7:12])
t8_fLAD_c_order <- c(t6_8_11_c[7:12])
t9_c_order <- c(t9_c[1:6])
t9_N_c_order <- c(t9_c[7:12])
t10_2_c_order <- c(t10_2_c[1:6])
t10_8_c_order <- c(t10_8_c[1:6])
t11_AAVS1_c_order <- c(t11_A_c[1:6])
t11_fLAD_c_order <- c(t11_11_c[1:6])
t11_LBR8_c_order <- c(t11_8_c[c(1:4,6,5)])
t12_LBR2_c_order <- c(t12_2_c[1:6])
t12_LBR8_c_order <- c(t12_8_c[1:6])
t13_AAVS1_c_order   <- c(t13_A_c[3:6])
t13_2_AAVS1_c_order <- c(t13_A_c[9:12])
t14_AAVS1_c_order   <- c(t14_A_c[4:6], t14_A_c2[1])
t14_2_AAVS1_c_order <- c(t14_A_c[10:12], t14_A_c2[2])
t13_LBR8_c_order    <- c(t13_8_c[3:6])
t13_2_LBR8_c_order  <- c(t13_8_c[9:12])
t14_LBR8_c_order    <- c(t14_8_c[4:7])
t14_fLAD_c_order    <- c(t14_11_c[4:6], t14_11_c2[3])
t14_2_fLAD_c_order  <- c(t14_11_c[10:12], t14_11_c2[4])

Y_order <- list(t1_order,
                t2_order,
                t3_order,
                t3_N_order,
                t4_order,
                t4_N_order,
                t7_LBR2_order,
                t7_LBR2_N_order,
                t6_AAVS1_order,
                t8_AAVS1_order,
                t6_fLAD_order,
                t8_fLAD_order,
                t1_c_order,
                t2_c_order,
                t3_c_order,
                t3_N_c_order,
                t4_c_order,
                t4_N_c_order,
                t7_c_order,
                t7_N_c_order,
                t6_AAVS1_c_order,
                t6_fLAD_c_order,
                t8_AAVS1_c_order,
                t8_fLAD_c_order,
                t9_order, 
                t9_N_order,
                t9_c_order,
                t9_N_c_order,
                t10_2_order,
                t10_2_c_order,
                t10_8_order,
                t10_8_c_order, 
                t11_AAVS1_order,
                t11_fLAD_order,
                t11_LBR8_order,
                t12_LBR2_order,
                t12_LBR8_order, 
                t11_AAVS1_c_order,
                t11_fLAD_c_order,
                t11_LBR8_c_order,
                t12_LBR2_c_order,
                t12_LBR8_c_order,
                t13_AAVS1_order,
                t13_2_AAVS1_order,
                t14_AAVS1_order,
                t14_2_AAVS1_order,
                t13_LBR8_order,
                t13_2_LBR8_order,
                t14_LBR8_order,
                t14_fLAD_order,
                t14_2_fLAD_order, 
                t13_AAVS1_c_order,
                t13_2_AAVS1_c_order,
                t14_AAVS1_c_order,
                t14_2_AAVS1_c_order,
                t13_LBR8_c_order,
                t13_2_LBR8_c_order,
                t14_LBR8_c_order,
                t14_fLAD_c_order,
                t14_2_fLAD_c_order)

#time series names
names_order <- c("t1","t2", "t3", "t3_N", "t4", "t4_N", "t7", "t7_N", "t6_A","t8_A", "t6_f", "t8_f", "t1_c", "t2_c", "t3_c", "t3_N_c", "t4_c", "t4_N_c", "t7_c", "t7_N_c", "t6_A_c", "t6_f_c", "t8_A_c", "t8_f_c", "t9", "t9_N", "t9_c", "t9_N_c", "t10_2", "t10_2_c", "t10_8", "t10_8_c", "t11_A", "t11_f", "t11_8", "t12_2", "t12_8", "t11_A_c", "t11_f_C", "t11_8_c","t12_2_C", "t12_8_c", "t13_A", "t13_2_A", "t14_A", "t14_2_A", "t13_8", "t13_2_8", "t14_8", "t14_f", "t14_2_f", "t13_A_c", "t13_2_A_c", "t14_A_c", "t14_2_A_c","t13_8_c","t13_2_8", "t14_8_c", "t14_f_c", "t14_2_f_c")

names(Y_order)<-names_order

#total amount of reads per sample
t1_sum_order <- c(t1_2_c_sum[1], t1_sum)
t2_sum_order <- c(t1_2_c_sum[7], t2_sum)
t3_sum_order <- c(t3_c_sum[1], t3_sum)
t3_N_sum_order <- c(t3_c_sum[1], t3_N_sum)
t4_sum_order <- c(t4_N_c_sum[1], t4_sum)
t4_N_sum_order <- c(t4_N_c_sum[7], t4_N_sum)
t7_LBR2_sum_order <- c(t7_c_sum[1], t7_sum)
t7_LBR2_N_sum_order <- c(t7_c_sum[7], t7_N_sum)
t6_AAVS1_sum_order <- c(t6_8_A_c_sum[1], t6_A_sum)
t8_AAVS1_sum_order <- c(t6_8_A_c_sum[7], t8_A_sum)
t6_fLAD_sum_order <- c(t6_8_11_c_sum[1], t6_11_sum)
t8_fLAD_sum_order <- c(t6_8_11_c_sum[1], t8_11_sum)
t9_sum_order <- c(t9_c_sum[1], t9_sum)
t9_N_sum_order <- c(t9_c_sum[7], t9_N_sum)
t10_2_sum_order <- c(t10_2_c_sum[1], t10_2_sum)
t10_8_sum_order <- c(t10_8_c_sum[1], t10_8_sum)
t11_AAVS1_sum_order <- c(t11_A_c_sum[1], t11_A_sum)
t11_fLAD_sum_order <- c(t11_11_c_sum[1], t11_11_sum)
t11_LBR8_sum_order <- c(t11_8_c_sum[1], t11_8_sum)

#t11_AAVS1_sum_order <- c(t11_A_c_sum[1], t11_A_sum, t11_A_c_sum[7])
#t11_fLAD_sum_order <- c(t11_11_c_sum[1], t11_11_sum, t11_11_c_sum[7])
#t11_LBR8_sum_order <- c(t11_8_c_sum[1], t11_8_sum, t11_8_c_sum[7])

t12_LBR2_sum_order <- c(t12_2_c_sum[1], t12_2_sum)
t12_LBR8_sum_order <- c(t12_8_c_sum[1], t12_8_sum)
t13_AAVS1_sum_order <- c(t13_A_c_sum[3], t13_A_sum, t13_A_c_sum[1:2])
t13_2_AAVS1_sum_order <- c(t13_A_c_sum[9], t13.2_A_sum, t13_A_c_sum[7:8])
t14_AAVS1_sum_order <- c(t14_A_c_sum[4], t14_A_sum, t14_A_c_sum[1:3])
t14_2_AAVS1_sum_order <- c(t14_A_c_sum[10], t14.2_A_sum, t14_A_c_sum[7:9])
t13_LBR8_sum_order <- c(t13_8_c_sum[3], t13_8_sum, t13_8_c_sum[1:2])
t13_2_LBR8_sum_order <- c(t13_8_c_sum[9], t13.2_8_sum, t13_8_c_sum[7:8])
t14_LBR8_sum_order <- c(t14_8_c_sum[4], t14_8_sum, t14_8_c_sum[1:3])
t14_fLAD_sum_order <- c(t14_11_c_sum[4], t14_11_sum, t14_11_c_sum[1:3])
t14_2_fLAD_sum_order <- c(t14_11_c_sum[10], t14.2_11_sum, t14_11_c_sum[7:9])

t1_c_sum_order <- c(t1_2_c_sum[1:3],t1_2_c_sum[5:6])
t2_c_sum_order <- c(t1_2_c_sum[7:12])
t3_c_sum_order <- c(t3_c_sum[1:6])
t3_N_c_sum_order <- c(t3_c_sum[7:8])
t4_c_sum_order <- c(t4_N_c_sum[1:6])
t4_N_c_sum_order <- c(t4_N_c_sum[7:12])
t7_c_sum_order <- c(t7_c_sum[1:6])
t7_N_c_sum_order <- c(t7_c_sum[7:12])
t6_AAVS1_c_sum_order <- c(t6_8_A_c_sum[1:6])
t6_fLAD_c_sum_order <- c(t6_8_11_c_sum[1:6])
t8_AAVS1_c_sum_order <- c(t6_8_A_c_sum[7:12])
t8_fLAD_c_sum_order <- c(t6_8_11_c_sum[7:12])
t9_c_sum_order <- c(t9_c_sum[1:6])
t9_N_c_sum_order <- c(t9_c_sum[7:12])
t10_2_c_sum_order <- c(t10_2_c_sum[1:6])
t10_8_c_sum_order <- c(t10_8_c_sum[1:6])
t11_AAVS1_c_sum_order <- c(t11_A_c_sum[1:6])
t11_fLAD_c_sum_order <- c(t11_11_c_sum[1:6])
t11_LBR8_c_sum_order <- c(t11_8_c_sum[c(1:4,6,5)])
t12_LBR2_c_sum_order <- c(t12_2_c_sum[1:6])
t12_LBR8_c_sum_order <- c(t12_8_c_sum[1:6])
t13_AAVS1_c_sum_order   <- c(t13_A_c_sum[3:6])
t13_2_AAVS1_c_sum_order <- c(t13_A_c_sum[9:12])
t14_AAVS1_c_sum_order   <- c(t14_A_c_sum[4:6], t14_A_c2_sum[1])
t14_2_AAVS1_c_sum_order <- c(t14_A_c_sum[10:12], t14_A_c2_sum[2])
t13_LBR8_c_sum_order    <- c(t13_8_c_sum[3:6])
t13_2_LBR8_c_sum_order  <- c(t13_8_c_sum[9:12])
t14_LBR8_c_sum_order    <- c(t14_8_c_sum[4:7])
t14_fLAD_c_sum_order    <- c(t14_11_c_sum[4:6], t14_11_c2_sum[3])
t14_2_fLAD_c_sum_order  <- c(t14_11_c_sum[10:12], t14_11_c2_sum[4])

Z_order <- list(t1_sum_order,
                t2_sum_order,
                t3_sum_order,
                t3_N_sum_order,
                t4_sum_order,
                t4_N_sum_order,
                t7_LBR2_sum_order,
                t7_LBR2_N_sum_order,
                t6_AAVS1_sum_order,
                t8_AAVS1_sum_order,
                t6_fLAD_sum_order,
                t8_fLAD_sum_order,
                t1_c_sum_order,
                t2_c_sum_order,
                t3_c_sum_order,
                t3_N_c_sum_order,
                t4_c_sum_order,
                t4_N_c_sum_order,
                t7_c_sum_order,
                t7_N_c_sum_order,
                t6_AAVS1_c_sum_order,
                t6_fLAD_c_sum_order,
                t8_AAVS1_c_sum_order,
                t8_fLAD_c_sum_order,
                t9_sum_order,
                t9_N_sum_order,
                t9_c_sum_order,
                t9_N_c_sum_order,
                t10_2_sum_order,
                t10_2_c_sum_order,
                t10_8_sum_order,
                t10_8_c_sum_order,
                t11_AAVS1_sum_order,
                t11_fLAD_sum_order,
                t11_LBR8_sum_order,
                t12_LBR2_sum_order,
                t12_LBR8_sum_order, 
                t11_AAVS1_c_sum_order,
                t11_fLAD_c_sum_order,
                t11_LBR8_c_sum_order,
                t12_LBR2_c_sum_order,
                t12_LBR8_c_sum_order,
                t13_AAVS1_sum_order,
                t13_2_AAVS1_sum_order,
                t14_AAVS1_sum_order,
                t14_2_AAVS1_sum_order,
                t13_LBR8_sum_order,
                t13_2_LBR8_sum_order,
                t14_LBR8_sum_order,
                t14_fLAD_sum_order,
                t14_2_fLAD_sum_order, 
                t13_AAVS1_c_sum_order,
                t13_2_AAVS1_c_sum_order,
                t14_AAVS1_c_sum_order,
                t14_2_AAVS1_c_sum_order,
                t13_LBR8_c_sum_order,
                t13_2_LBR8_c_sum_order,
                t14_LBR8_c_sum_order,
                t14_fLAD_c_sum_order,
                t14_2_fLAD_c_sum_order)

#time series names
names(Z_order)<- names_order

#type of mutations, score of the mutation, sequence read, grouped per timeseries
timeseries1_order <- c(timeseries1_2_c[1], timeseries1)
timeseries2_order <- c(timeseries1_2_c[7], timeseries2)
timeseries3_order <- c(timeseries3_c[1], timeseries3)
timeseries3_N_order <- c(timeseries3_c[1], timeseries3_N)
timeseries4_order <- c(timeseries4_N_c[1], timeseries4)
timeseries4_N_order <- c(timeseries4_N_c[7], timeseries4_N)
timeseries7_LBR2_order <- c(timeseries7_c[1], timeseries7)
timeseries7_LBR2_N_order <- c(timeseries7_c[7], timeseries7_N)
timeseries6_AAVS1_order <- c(timeseries6_8_A_c[1], timeseries6_A)
timeseries8_AAVS1_order <- c(timeseries6_8_A_c[7], timeseries8_A)
timeseries6_fLAD_order <- c(timeseries6_8_11_c[1], timeseries6_11)
timeseries8_fLAD_order <- c(timeseries6_8_11_c[1], timeseries8_11)
timeseries9_order <- c(timeseries9_c[1], timeseries9)
timeseries9_N_order <- c(timeseries9_c[7], timeseries9_N)
timeseries10_2_order <- c(timeseries10_2_c[1], timeseries10_2)
timeseries10_8_order <- c(timeseries10_8_c[1], timeseries10_8)
timeseries11_AAVS1_order <- c(timeseries11_A_c[1], timeseries11_A)
timeseries11_fLAD_order <- c(timeseries11_11_c[1], timeseries11_11)
timeseries11_LBR8_order <- c(timeseries11_8_c[1], timeseries11_8)

#timeseries11_AAVS1_order <- c(timeseries11_A_c[1], timeseries11_A, timeseries11_A_c[7])
#timeseries11_fLAD_order <- c(timeseries11_11_c[1], timeseries11_11, timeseries11_11_c[7])
#timeseries11_LBR8_order <- c(timeseries11_8_c[1], timeseries11_8, timeseries11_8_c[7])

timeseries12_LBR2_order <- c(timeseries12_2_c[1], timeseries12_2)
timeseries12_LBR8_order <- c(timeseries12_8_c[1], timeseries12_8)
timeseries13_AAVS1_order <- c(timeseries13_A_c[3], timeseries13_A, timeseries13_A_c[1:2])
timeseries13_2_AAVS1_order <- c(timeseries13_A_c[9], timeseries13.2_A, timeseries13_A_c[7:8])
timeseries14_AAVS1_order <- c(timeseries14_A_c[4], timeseries14_A, timeseries14_A_c[1:3])
timeseries14_2_AAVS1_order <- c(timeseries14_A_c[10], timeseries14.2_A, timeseries14_A_c[7:9])
timeseries13_LBR8_order <- c(timeseries13_8_c[3], timeseries13_8, timeseries13_8_c[1:2])
timeseries13_2_LBR8_order <- c(timeseries13_8_c[9], timeseries13.2_8, timeseries13_8_c[7:8])
timeseries14_LBR8_order <- c(timeseries14_8_c[4], timeseries14_8, timeseries14_8_c[1:3])
timeseries14_fLAD_order <- c(timeseries14_11_c[4], timeseries14_11, timeseries14_11_c[1:3])
timeseries14_2_fLAD_order <- c(timeseries14_11_c[10], timeseries14.2_11, timeseries14_11_c[7:9])

timeseries1_c_order <- c(timeseries1_2_c[1:3],timeseries1_2_c[5:6])
timeseries2_c_order <- c(timeseries1_2_c[7:12])
timeseries3_c_order <- c(timeseries3_c[1:6])
timeseries3_N_c_order <- c(timeseries3_c[7:8])
timeseries4_c_order <- c(timeseries4_N_c[1:6])
timeseries4_N_c_order <- c(timeseries4_N_c[7:12])
timeseries7_c_order <- c(timeseries7_c[1:6])
timeseries7_N_c_order <- c(timeseries7_c[7:12])
timeseries6_AAVS1_c_order <- c(timeseries6_8_A_c[1:6])
timeseries6_fLAD_c_order <- c(timeseries6_8_11_c[1:6])
timeseries8_AAVS1_c_order <- c(timeseries6_8_A_c[7:12])
timeseries8_fLAD_c_order <- c(timeseries6_8_11_c[7:12])
timeseries9_c_order <- c(timeseries9_c[1:6])
timeseries9_N_c_order <- c(timeseries9_c[7:12])
timeseries10_2_c_order <- c(timeseries10_2_c[1:6])
timeseries10_8_c_order <- c(timeseries10_8_c[1:6])
timeseries11_AAVS1_c_order <- c(timeseries11_A_c[1:6])
timeseries11_fLAD_c_order <- c(timeseries11_11_c[1:6])
timeseries11_LBR8_c_order <- c(timeseries11_8_c[c(1:4,6,5)])
timeseries12_LBR2_c_order <- c(timeseries12_2_c[1:6])
timeseries12_LBR8_c_order <- c(timeseries12_8_c[1:6])
timeseries13_AAVS1_c_order   <- c(timeseries13_A_c[3:6])
timeseries13_2_AAVS1_c_order <- c(timeseries13_A_c[9:12])
timeseries14_AAVS1_c_order   <- c(timeseries14_A_c[4:6], timeseries14_A_c2[1])
timeseries14_2_AAVS1_c_order <- c(timeseries14_A_c[10:12], timeseries14_A_c2[2])
timeseries13_LBR8_c_order    <- c(timeseries13_8_c[3:6])
timeseries13_2_LBR8_c_order  <- c(timeseries13_8_c[9:12])
timeseries14_LBR8_c_order    <- c(timeseries14_8_c[4:7])
timeseries14_fLAD_c_order    <- c(timeseries14_11_c[4:6], timeseries14_11_c2[3])
timeseries14_2_fLAD_c_order  <- c(timeseries14_11_c[10:12], timeseries14_11_c2[4])

X_order <- list(timeseries1_order,
                timeseries2_order,
                timeseries3_order,
                timeseries3_N_order,
                timeseries4_order,
                timeseries4_N_order,
                timeseries7_LBR2_order,
                timeseries7_LBR2_N_order,
                timeseries6_AAVS1_order,
                timeseries8_AAVS1_order,
                timeseries6_fLAD_order,
                timeseries8_fLAD_order,
                timeseries1_c_order,
                timeseries2_c_order,
                timeseries3_c_order,
                timeseries3_N_c_order,
                timeseries4_c_order,
                timeseries4_N_c_order,
                timeseries7_c_order,
                timeseries7_N_c_order,
                timeseries6_AAVS1_c_order,
                timeseries6_fLAD_c_order,
                timeseries8_AAVS1_c_order,
                timeseries8_fLAD_c_order,
                timeseries9_order,
                timeseries9_N_order,
                timeseries9_c_order,
                timeseries9_N_c_order,
                timeseries10_2_order,
                timeseries10_2_c_order,
                timeseries10_8_order,
                timeseries10_8_c_order,
                timeseries11_AAVS1_order,
                timeseries11_fLAD_order,
                timeseries11_LBR8_order,
                timeseries12_LBR2_order,
                timeseries12_LBR8_order, 
                timeseries11_AAVS1_c_order,
                timeseries11_fLAD_c_order,
                timeseries11_LBR8_c_order,
                timeseries12_LBR2_c_order,
                timeseries12_LBR8_c_order, 
                timeseries13_AAVS1_order,
                timeseries13_2_AAVS1_order,
                timeseries14_AAVS1_order,
                timeseries14_2_AAVS1_order,
                timeseries13_LBR8_order,
                timeseries13_2_LBR8_order,
                timeseries14_LBR8_order,
                timeseries14_fLAD_order,
                timeseries14_2_fLAD_order, 
                timeseries13_AAVS1_c_order,
                timeseries13_2_AAVS1_c_order,
                timeseries14_AAVS1_c_order,
                timeseries14_2_AAVS1_c_order,
                timeseries13_LBR8_c_order,
                timeseries13_2_LBR8_c_order,
                timeseries14_LBR8_c_order,
                timeseries14_fLAD_c_order,
                timeseries14_2_fLAD_c_order)


#time series names
names(X_order)<- names_order

#sum of the various indel per sample
p1_order <- c(p1_2_c[1], p1)
p2_order <- c(p1_2_c[7], p2)
p3_order <- c(p3_c[1], p3)
p3_N_order <- c(p3_c[1], p3_N)
p4_order <- c(p4_N_c[1], p4)
p4_N_order <- c(p4_N_c[7], p4_N)
p7_LBR2_order <- c(p7_c[1], p7)
p7_LBR2_N_order <- c(p7_c[7], p7_N)
p6_AAVS1_order <- c(p6_8_A_c[1], p6_A)
p8_AAVS1_order <- c(p6_8_A_c[7], p8_A)
p6_fLAD_order <- c(p6_8_11_c[1], p6_11)
p8_fLAD_order <- c(p6_8_11_c[7], p8_11)
p9_order <- c(p9_c[1], p9)
p9_N_order <- c(p9_c[7], p9_N)
p10_2_order <- c(p10_2_c[1], p10_2)
p10_8_order <- c(p10_8_c[1], p10_8)
p11_AAVS1_order <- c(p11_A_c[1], p11_A)
p11_fLAD_order <- c(p11_11_c[1], p11_11)
p11_LBR8_order <- c(p11_8_c[1], p11_8)

#p11_AAVS1_order <- c(p11_A_c[1], p11_A, p11_A_c[7])
#p11_fLAD_order <- c(p11_11_c[1], p11_11, p11_11_c[7])
#p11_LBR8_order <- c(p11_8_c[1], p11_8, p11_8_c[7])

p12_LBR2_order <- c(p12_2_c[1], p12_2)
p12_LBR8_order <- c(p12_8_c[1], p12_8)
p13_AAVS1_order <- c(p13_A_c[3], p13_A, p13_A_c[1:2])
p13_2_AAVS1_order <- c(p13_A_c[9], p13.2_A, p13_A_c[7:8])
p14_AAVS1_order <- c(p14_A_c[4], p14_A, p14_A_c[1:3])
p14_2_AAVS1_order <- c(p14_A_c[10], p14.2_A, p14_A_c[7:9])
p13_LBR8_order <- c(p13_8_c[3], p13_8, p13_8_c[1:2])
p13_2_LBR8_order <- c(p13_8_c[9], p13.2_8, p13_8_c[7:8])
p14_LBR8_order <- c(p14_8_c[4], p14_8, p14_8_c[1:3])
p14_fLAD_order <- c(p14_11_c[4], p14_11, p14_11_c[1:3])
p14_2_fLAD_order <- c(p14_11_c[10], p14.2_11, p14_11_c[7:9])

p1_c_order <- c(p1_2_c[1:3],p1_2_c[5:6])
p2_c_order <- c(p1_2_c[7:12])
p3_c_order <- c(p3_c[1:6])
p3_N_c_order <- c(p3_c[7:8])
p4_c_order <- c(p4_N_c[1:6])
p4_N_c_order <- c(p4_N_c[7:12])
p7_c_order <- c(p7_c[1:6])
p7_N_c_order <- c(p7_c[7:12])
p6_AAVS1_c_order <- c(p6_8_A_c[1:6])
p6_fLAD_c_order <- c(p6_8_11_c[1:6])
p8_AAVS1_c_order <- c(p6_8_A_c[7:12])
p8_fLAD_c_order <- c(p6_8_11_c[7:12])
p9_c_order <- c(p9_c[1:6])
p9_N_c_order <- c(p9_c[7:12])
p10_2_c_order <- c(p10_2_c[1:6])
p10_8_c_order <- c(p10_8_c[1:6])
p11_AAVS1_c_order <- c(p11_A_c[1:6])
p11_fLAD_c_order <- c(p11_11_c[1:6])
p11_LBR8_c_order <- c(p11_8_c[c(1:4,6,5)])
p12_LBR2_c_order <- c(p12_2_c[1:6])
p12_LBR8_c_order <- c(p12_8_c[1:6])
p13_AAVS1_c_order   <- c(p13_A_c[3:6])
p13_2_AAVS1_c_order <- c(p13_A_c[9:12])
p14_AAVS1_c_order   <- c(p14_A_c[4:6], p14_A_c2[1])
p14_2_AAVS1_c_order <- c(p14_A_c[10:12], p14_A_c2[2])
p13_LBR8_c_order    <- c(p13_8_c[3:6])
p13_2_LBR8_c_order  <- c(p13_8_c[9:12])
p14_LBR8_c_order    <- c(p14_8_c[4:7])
p14_fLAD_c_order    <- c(p14_11_c[4:6], p14_11_c2[3])
p14_2_fLAD_c_order  <- c(p14_11_c[10:12], p14_11_c2[4])

V_order <- list(p1_order,
                p2_order,
                p3_order,
                p3_N_order,
                p4_order,
                p4_N_order,
                p7_LBR2_order,
                p7_LBR2_N_order,
                p6_AAVS1_order,
                p8_AAVS1_order,
                p6_fLAD_order,
                p8_fLAD_order,
                p1_c_order,
                p2_c_order,
                p3_c_order,
                p3_N_c_order,
                p4_c_order,
                p4_N_c_order,
                p7_c_order,
                p7_N_c_order,
                p6_AAVS1_c_order,
                p6_fLAD_c_order,
                p8_AAVS1_c_order,
                p8_fLAD_c_order,
                p9_order, 
                p9_N_order,
                p9_c_order,
                p9_N_c_order,
                p10_2_order,
                p10_2_c_order,
                p10_8_order,
                p10_8_c_order,
                p11_AAVS1_order,
                p11_fLAD_order,
                p11_LBR8_order,
                p12_LBR2_order,
                p12_LBR8_order, 
                p11_AAVS1_c_order,
                p11_fLAD_c_order,
                p11_LBR8_c_order,
                p12_LBR8_c_order,
                p12_LBR2_c_order,
                p13_AAVS1_order,
                p13_2_AAVS1_order,
                p14_AAVS1_order,
                p14_2_AAVS1_order,
                p13_LBR8_order,
                p13_2_LBR8_order,
                p14_LBR8_order,
                p14_fLAD_order,
                p14_2_fLAD_order, 
                p13_AAVS1_c_order,
                p13_2_AAVS1_c_order,
                p14_AAVS1_c_order,
                p14_2_AAVS1_c_order,
                p13_LBR8_c_order,
                p13_2_LBR8_c_order,
                p14_LBR8_c_order,
                p14_fLAD_c_order,
                p14_2_fLAD_c_order)

#time series names
names(V_order)<- names_order

# all series grouped per sgRNA
jt_1 = c(1, 2, 3, 4, 5, 6, 7, 8, 25,26,29,36) #LBR2 (-/+ NU7741)
jt_2 = c(31,35,37, 47,48,49) #LBR8 
jt_3 = c(10, 33,43,44,45,46) #AAVS1 #9
jt_4 = c(11,12,34,50,51) #chr11 

#control samples (no sgRNA or no Shield-1)
jc_1 <- c(13,14,15,16,17,18,19,20,27,28,30,41) # control LBR2 (-/+ NU7741)
jc_2 <- c(32,40,42,56,57,58) #LBR8 
jc_3 <- c(23, 38,52,53,54,55) # control AAVS1 #21
jc_4 <- c(22,24,39,59,60) # control chr11 

Import TIDE data

#import time points data (intact & indel fractions)

#timeseries 1

##+shield (with induction)
#P1 = wildtype population timeserie 1
#Mt1 = total indels timeserie 1
#M11 = +1 insertion timeserie 1
#M12 = -7 deletion timeserie 1
#M13 = other indels timeserie 1

P1 <- c(99.10,97.1,94.8,91.9,88.5,82.4,78.6,70.8,67.2,54.4,39.9,NA, NA, 32.2, NA, NA,27.1)
Mt1 <- c(0.40, 2.2, 4.6,7.3,10.8,16.8,19.8,27.8,31.3,44.5,58.5,NA, NA, 66,NA, NA, 70.4)

M11 <- c(0.1, 0.7,4,6.9,10.1,16.3,19.3,25.5,27.9,39,45.6,NA,NA, 49.8,NA, NA, 50.8)
M12 <- c(0, 0,0,0,0,0.5,0.5,2.2,2.9,4.5,10,NA,NA,13,NA, NA,13.8)
M13 <- c(0.2, 0.8,0.6,0.4,0.7,0,0,0,0.5,1.1,2.9,NA,NA, 3.3,NA, NA,5.8)


##-shield (without induction) background
#Pn1 = wildtype population timeserie 1
#Mtn1 = total indels timeserie 1
#Mn11 = +1 insertion timeserie 1
#Mn12 = -7 deletion timeserie 1

Pn1 <- c(99.10,NA,98,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,93.4)
Mtn1 <- c(0.40,NA,1.40,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,5.6)

Mn11 <- c(0.1,NA,0.9,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,3.8)
Mn12 <- c(0,NA,0,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0.3)

#timeseries 2

##+shield (with induction)
#P2 = wildtype population timeserie 2
#Mt2 = total indels timeserie 2
#M21 = +1 insertion timeserie 2
#M22 = -7 deletion timeserie 2
#M23 = other indels timeserie 2

P2 <- c(97.60, 97.9, 93.9, 92.4, 90.7, 85.5, 81.9, 78.1, 72.4, 66.1, 52.2, NA, NA, 45.7,NA, NA, 37.6) 
Mt2 <- c(1.60, 1.5, 4.8, 5.5, 8.4, 12.2, 16.8, 20.6, 26.0, 32.1, 45.1, NA, NA, 51.5, NA, NA, 56.8) 

M21 <- c(0, 0.1,2.7,4.7,7.4,10,14.8,17.9,20.4,24.7,32.5,NA,NA,36.6,NA,NA,41.8)
M22 <- c(0, 0.3,0,0,0.3,0.3,1.6,2,2.5,4.3,8.7,NA,NA,10.7,NA,NA,10.7)
M23 <- c(1.5, 0.9,2,0.6,0.8,1.9,0.4,0.7,3,3,3.8,NA,NA,4.2,NA,NA,4.2)


##-shield (without induction) background
#Pn2 = wildtype population timeserie 2
#Mtn2 = total indels timeserie 2
#Mn21 = +1 insertion timeserie 2
#Mn22 = -7 deletion timeserie 2

Pn2 <- c(97.60,NA,96.7,NA,NA,NA,95.6,NA,NA,94.4,NA,NA,NA,NA,NA,NA,97.4)
Mtn2 <- c(1.60,NA,2.1,NA,NA,NA,0.9,NA,NA,1.6,NA,NA,NA,NA,NA,NA,1.1)
Mn21 <- c(0,NA,0,NA,NA,NA,0.8,NA,NA,1.4,NA,NA,NA,NA,NA,NA,0.8)
Mn22 <- c(0,NA,0,NA,NA,NA,0,NA,NA,0,NA,NA,NA,NA,NA,NA,0)


#timeseries 3

##+shield (with induction)
#P3 = wildtype population timeserie 3
#Mt3 = total indels timeserie 3
#M31 = +1 insertion timeserie 3
#M32 = -7 deletion timeserie 3
#M33 = other indels timeserie 3

P3<- c(98.10, 90.1, 93.6, 89.9, 89.3, 84.5, 80.1, 73.5, NA, 59.5, 42.4, 37.2, NA, 33.9, NA, NA, 25.4)
Mt3 <- c(0.80, 6.3, 4.7, 8.5, 9, 13.9, 18.1, 22.4, NA, 35.9, 54.4, 59.5, NA, 62.7, NA, NA,71.9)

M31 <- c(0, 0,1.5,6,7.5,11.1,15,16.7,NA,26.6,37.5,42.9,NA, 44.9,NA, NA,49.3)
M32 <- c(0, 0,0,0,0.7,0.5,1.4,1.6,NA,6.8,12.2,12.4,NA, 14.5,NA, NA,17)
M33 <- c(0.8,6.4,3.4,2.5,1,2.4,1.7,4.1,NA, 3,4.7,4.1,NA, 3.3,NA, NA,5.8)


##-shield (without induction) background
#Pn3 = wildtype population timeserie 3
#Mtn3 = total indels timeserie 3
#Mn31 = +1 insertion timeserie 3
#Mn32 = -7 deletion timeserie 3

Pn3 <- c(98.10,NA,96.8,NA,NA,NA,97.4,NA,NA,97.3,NA, NA, NA, NA,NA,NA,97)
Mtn3 <- c(0.80,NA,2.3,NA,NA,NA,1.5,NA,NA,1.7,NA, NA, NA,NA,NA,NA,2.2)
Mn31 <- c(0,NA,0,NA,NA,NA,0,NA,NA,0.2,NA, NA, NA,NA,NA,NA,1.7)
Mn32 <- c(0,NA,0,NA,NA,NA,0,NA,NA,0,NA, NA, NA,NA,NA,NA,0.2)


##+shield (with induction) +NU7441
#Pi3 = wildtype population timeserie 3
#Mti3 = total indels timeserie 3
#M3i1 = +1 insertion timeserie 3
#M3i2 = -7 deletion timeserie 3

Mti3<- c(0.80,4.6,3.8,3.1,6.3,8.2,11.2,15.8,NA, 26.4,46.8,53.2,NA,55.1,NA,NA, 67.4)
Pi3 <-c(98.10,92.9,95.1,95.7,92.2, 90.5, 87.5, 82.9, NA, 68.2, 46.5, 40.8,NA, 37.6, NA,NA,27.3)   
  
M3i1 <- c(0,0.4,1.4, 1.5, 2.1, 4.4, 4.6, 5.5, NA, 8.9, 16.7, 20.2, NA, 20.3, NA,NA,24.4)
M3i2 <- c(0,0.7, 1, 0.9, 2.3, 3.5, 6.2, 10.2, NA, 15, 28.8, 30.3, NA, 32, NA,NA,38.9) 

#group the replicates in one dataframe.
#total indels (+ shield)
d <- data.frame(time=time_all, intact1=P1, indel1=Mt1, intact2=P2, indel2=Mt2, intact3=P3, indel3=Mt3)

#indels split in +1 & =7 (+ shield)
d2 <- data.frame(time=time_all, ins1.1=M11, del7.1=M12, ins1.2=M21, del7.2=M22, ins1.3=M31, del7.3=M32)

#total indels (+ shield)
d3 <- data.frame(time=time_all, intact3i=Pi3, indel3i=Mti3)

#indels split in +1 & =7 (+ shield) + NU7441
d4 <- data.frame(time=time_all, ins1.3i=M3i1, del7.3i=M3i2)

#total indels (- shield)
dn <- data.frame(time=time_all, intact1=Pn1, indel1=Mtn1, intact2=Pn2, indel2=Mtn2, intact3=Pn3, indel3=Mtn3)

#indels split in +1 & =7 (- shield)
dn2 <- data.frame(time=time_all, ins1.1=Mn11, del7.1=Mn12, ins1.2=Mn21, del7.2=Mn22, ins1.3=Mn31, del7.3=Mn32)

#Order all timerseries measured with TIDE
j <- c(1,2,3)
q1 <- list(P1, P2, P3)
q2 <- list(Mt1, Mt2, Mt3)

qi2 <- list(M11, M21, M31)
qi3 <- list(M12, M22, M32)
qi4 <- list(M13, M23, M33)

Functions to extract data from sequencing files

To check the raw data counts, plot the total amount of deletions and insertions and wildtype over time. The sample are normalized to the total amount of read per timepoint in each timeseries.

#####################################
#Timeseries
#####################################

# each timepoint is divided for the total reads per sample
# the indels at t=0 are substracted from all timepoints
# notclear sample (not assigned to wt, deletion, insertion or point mutations) are discarded
# Point mutations are considered as wt reads

timeseriesplots <- function(Y_order, Z_order, jt, time_all){
  
  names_samples <- names(Y_order[jt])
 
  wildtype_tot <- mutations <- matrix(NA, nrow=length(time_all)+1)
  mutations_tot <- mutations <- matrix(NA, nrow=length(time_all)+1)
  rownames(wildtype_tot) <- rownames(mutations_tot) <- c(time_all,"U")
  
  for (q in 1:length(jt)){
    wildtype <- mutations <- matrix(NA, nrow=length(time_all))
    rownames(wildtype) <- rownames(mutations) <- time_all
    j <- jt[q]

  df <- ldply(Y_order[[j]], data.frame)
  df2 <- ddply(df, .(V2), function(x) { x$index <- 1:nrow(x); x})
  colnames(df2) <- c("tot_read", "type", "index")

  #check
  head(df)
  head(df2)
  head(Z_order[[j]][1])

  #take the ratio by dividing to total amount of read per sample to normalize for differences of read counts per input

  #check the proportion of reads that were not assigned to a specific type (not clear)
  notcl <- (df2$tot_read[df2$type=="not_clear"]/Z_order[[j]])*100
  notcl
  notcl2 <- (df2$tot_read[df2$type=="not_clear"])

  #remove the notcl reads
  del <- (as.numeric(df2$tot_read[df2$type=="del"])/ ((Z_order[[j]])-notcl2))*100
  ins <- (df2$tot_read[df2$type=="ins"]/((Z_order[[j]])-notcl2))*100
  wt <- (df2$tot_read[df2$type=="wt"]/ ((Z_order[[j]])-notcl2))*100
  pmut <- (df2$tot_read[df2$type=="wt_point_mut"]/((Z_order[[j]])-notcl2))*100

  #combine the different mutations (insertion and deletions) or non mutations (wild-type and point mutations)
  indel <- del+ins
  wt2 <- wt+pmut
  indel
  wt2
  wt2+indel

  #substract baseline = substract the reads of mutations at time point t=0 from all other timepoints
  indel <- indel-indel[1]
  indel[indel<=0] <- 0

  tot <- wt2+indel
  tot

  wt2 <- (wt2/tot)*100
  indel <- (indel/tot)*100
  wt2+indel

  ptime <- time_all
  
  #timeseries 3 had one different time point t=37 instead of t=21
  #some of controls have different time points
  time <- c(0, 4, 8, 10, 12, 14, 16, 18, 21, 24, 33, 42, 60)
  time2 <- c(0, 4, 8, 10, 12, 14, 16, 18, 24, 33, 37, 42, 60)
  time3 <- c( 0,4,8,10,12,14,16,18,21,24,33,42,60,93,115,158)
  time4 <- c(0, 4, 8, 10, 12, 14, 16, 18, 21, 24, 33, 42, 48, 54, 60)
  time5 <- c(0, 4, 8, 10, 12, 14, 16, 18, 21, 24, 33, 38, 42, 48, 54, 60)
  time_c <- c(0,8,16,24,60,70)
  time_c1 <- c(0,8,18,60,70)
  time_c2 <- c(60,70)
  time_c3 <- c(0,8,16,18, 24,60,158,165)
  time_c4 <- c(0,24,60,70)

  if((!j==3|!j==4) & length(time_all)>7){
    ptime <- time
  }

  if(j==3|j==4){
    ptime <- time2
  }
  
  if(j==43|j==44||j==47|j==48){
    ptime <- time4
  }
  
  if(j==45|j==46||j==49|j==50|j==51){
    ptime <- time5
  }
  
  if((!j==3|!j==4) & length(time_all)<14){
    ptime <- time_c
  }
  
  if(j==13){
    ptime <- time_c1
  }

  if(j==16){
    ptime <- time_c2
  }
  
   if(j==52|j==53||j==54|j==55|j==56|j==57||j==58|j==59|j==60){
    ptime <- time_c4
  }

  wildtype[as.character(ptime),] <- wt2
  mutations[as.character(ptime),] <- indel

  U <- Ui <- NA
  
  # sigmoidal function to determine untransfected fraction
  sigmoid = function(params, x) {
  1-params[1] / (1 + exp(-params[2] * (x - params[3])))
  }

  df6 <- data.frame(time=ptime, wt=wt2,indel=indel)
  df6[,2:3] <- df6[,2:3]/100

  x=df6[,1];
  y=df6[,2];
  z=df6[,3];
  
  if(length(time_all)>7){
  fitmodel_s <- nls(y~(1-a/(1 + exp(-b * (x-c)))), start=list(a=17,b=1,c=1))
  U = coef(fitmodel_s)[1];
  Ui = 1-coef(fitmodel_s)[1];
  } 
  
  if(length(time_all)<14){
  U = df6$indel[which(df6$time==60)];
  Ui = 1-U;
  } 
  
  wildtype1 <- rbind(wildtype, Ui*100)
  mutations1 <- rbind(mutations, U*100)

  wildtype_tot <- cbind(wildtype_tot, wildtype1)
  mutations_tot <- cbind(mutations_tot, mutations1)
  
  #plot progress of wt and indels in time 
   plot(time_all,wildtype, xlab='time (h)',ylab='wt/(indel+wt) (%)', main=paste('series', names_samples[q]), xlim=c(0,max(time_all)),ylim=c(0,100),pch=20,cex=1.5, col=2)
  
  plot(time_all,mutations, type="p", xlab='time (h)', ylab='indel/(indel+wt) (%)', main=paste('series', names_samples[q]), xlim=c(0,max(time_all)), ylim=c(0,100), pch=20, cex=1.5, col=3)
  }
  
  wildtype_tot <- as.matrix(wildtype_tot[,-1])
  mutations_tot <- as.matrix(mutations_tot[,-1])
  
  if(ncol(wildtype_tot)>1){
    colnames(wildtype_tot) <- colnames(mutations_tot) <- paste('index', jt)
  }
 
  return(list(
      wildtype_tot=wildtype_tot,
      mutations_tot=mutations_tot,
      names_samples=names_samples))
}

#####################################
#Timeseries keep background indels at t=0
#####################################

timeseriesplotsb <- function(Y_order, Z_order, jt, time_all){
  
  names_samples <- names(Y_order[jt])
  
  wildtype_tot <- mutations <- matrix(NA, nrow=length(time_all)+1)
  mutations_tot <- mutations <- matrix(NA, nrow=length(time_all)+1)
  rownames(wildtype_tot) <- rownames(mutations_tot) <- c(time_all,"U")
  
  for (q in 1:length(jt)){
    wildtype <- mutations <- matrix(NA, nrow=length(time_all))
    rownames(wildtype) <- rownames(mutations) <- time_all
    j <- jt[q]
    
    df <- ldply(Y_order[[j]], data.frame)
    df2 <- ddply(df, .(V2), function(x) { x$index <- 1:nrow(x); x})
    colnames(df2) <- c("tot_read", "type", "index")
    
    #check
    head(df)
    head(df2)
    head(Z_order[[j]][1])
    
    #take the ratio by dividing to total amount of read per sample to normalize for difference of reads counts per input
    
    #check the proportion of reads that were not assigned to a specific type (not clear)
    notcl <- (df2$tot_read[df2$type=="not_clear"]/Z_order[[j]])*100
    notcl
    notcl2 <- (df2$tot_read[df2$type=="not_clear"])
    
    #remove the notcl reads
    del <- (as.numeric(df2$tot_read[df2$type=="del"])/ ((Z_order[[j]])-notcl2))*100
    ins <- (df2$tot_read[df2$type=="ins"]/((Z_order[[j]])-notcl2))*100
    wt <- (df2$tot_read[df2$type=="wt"]/ ((Z_order[[j]])-notcl2))*100
    pmut <- (df2$tot_read[df2$type=="wt_point_mut"]/((Z_order[[j]])-notcl2))*100
    
    #combine the different mutations (insertion and deletions) or non mutations (wild-type and point mutations)
    indel <- del+ins
    wt2 <- wt+pmut
    indel
    wt2
    wt2+indel
    
    #substract baseline = substract the reads of mutations at time point t=0 from all other timepoints
  #  indel <- indel-indel[1]
  #  indel[indel<=0] <- 0
    
    tot <- wt2+indel
    tot
    
    wt2 <- (wt2/tot)*100
    indel <- (indel/tot)*100
    wt2+indel
    
    ptime <- time_all
    
    #timeseries 3 had one different time point t=37 instead of t=21
    #some of controls have different time points
    time <- c(0, 4, 8, 10, 12, 14, 16, 18, 21, 24, 33, 42, 60)
    time2 <- c(0, 4, 8, 10, 12, 14, 16, 18, 24, 33, 37, 42, 60)
    time3 <- c( 0,4,8,10,12,14,16,18,21,24,33,42,60,93,115,158)
    time4 <- c(0, 4, 8, 10, 12, 14, 16, 18, 21, 24, 33, 42, 48, 54, 60)
    time5 <- c(0, 4, 8, 10, 12, 14, 16, 18, 21, 24, 33, 38, 42, 48, 54, 60)
    time_c <- c(0,8,16,24,60,70)
    time_c1 <- c(0,8,18,60,70)
    time_c2 <- c(60,70)
    time_c3 <- c(0,8,16,18, 24,60,158,165)
    time_c4 <- c(0,24,60,70)
    
    if((!j==3|!j==4) & length(time_all)>7){
      ptime <- time
    }
    
    if(j==3|j==4){
      ptime <- time2
    }
    
    if(j==43|j==44||j==47|j==48){
      ptime <- time4
    }
    
    if(j==45|j==46||j==49|j==50|j==51){
      ptime <- time5
    }
    
    if((!j==3|!j==4) & length(time_all)<14){
      ptime <- time_c
    }
    
    if(j==13){
      ptime <- time_c1
    }
    
    if(j==16){
      ptime <- time_c2
    }
    
    if(j==52|j==53||j==54|j==55|j==56|j==57||j==58|j==59|j==60){
      ptime <- time_c4
    }
    
    wildtype[as.character(ptime),] <- wt2
    mutations[as.character(ptime),] <- indel
    
    U <- Ui <- NA
    
    # sigmoidal function to determine untransfected fraction
    sigmoid = function(params, x) {
      1-params[1] / (1 + exp(-params[2] * (x - params[3])))
    }
    
    df6 <- data.frame(time=ptime, wt=wt2,indel=indel)
    df6[,2:3] <- df6[,2:3]/100
    
    x=df6[,1];
    y=df6[,2];
    z=df6[,3];
    
    if(length(time_all)>7){
      fitmodel_s <- nls(y~(1-a/(1 + exp(-b * (x-c)))), start=list(a=17,b=1,c=1))
      U = coef(fitmodel_s)[1];
      Ui = 1-coef(fitmodel_s)[1];
    } 
    
    if(length(time_all)<14){
      U = df6$indel[which(df6$time==60)];
      Ui = 1-U;
    } 
    
    wildtype1 <- rbind(wildtype, Ui*100)
    mutations1 <- rbind(mutations, U*100)
    
    wildtype_tot <- cbind(wildtype_tot, wildtype1)
    mutations_tot <- cbind(mutations_tot, mutations1)
    
    #plot progress of wt and indels in time 
    plot(time_all,wildtype, xlab='time (h)',ylab='wt/(indel+wt) (%)', main=paste('series', names_samples[q]), xlim=c(0,max(time_all)),ylim=c(0,100),pch=20,cex=1.5, col=2)
    
    plot(time_all,mutations, type="p", xlab='time (h)', ylab='indel/(indel+wt) (%)', main=paste('series', names_samples[q]), xlim=c(0,max(time_all)), ylim=c(0,100), pch=20, cex=1.5, col=3)
  }
  
  wildtype_tot <- as.matrix(wildtype_tot[,-1])
  mutations_tot <- as.matrix(mutations_tot[,-1])
  
  if(ncol(wildtype_tot)>1){
    colnames(wildtype_tot) <- colnames(mutations_tot) <- paste('index', jt)
  }
  
  return(list(
    wildtype_tot=wildtype_tot,
    mutations_tot=mutations_tot,
    names_samples=names_samples))
}


#####################################
#Normalize timeseries to untransfected fraction
#####################################

# normalize all input timeseries to a value (default is untransfected fraction) correcting for the variation of transfections

timeseriesplots_norm <- function(jt, time_all, wildtype_tot, mutations_tot, time_c_all=NA, mutations_c_tot=NA, norm_m=NA){
  
wildtype2 <- cbind(wildtype_tot, mean=rowMeans(wildtype_tot))
mutations2 <- cbind(mutations_tot, mean=rowMeans(mutations_tot))

wildtype2
mutations2

if(is.na(norm_m)){
  namesA <- as.character(c(time_all, 'U'))
} else{
  namesA <- as.character(c(time_all))
}

#determine factor to normalize all timeseries
if(is.na(norm_m)){
  U <- 1
  norm_m <- mutations2[nrow(mutations2),(length(jt)+U)]
} else{
  U <- 0
}

norm2_m <- (norm_m/(as.matrix(mutations_tot)[nrow(mutations2),]))
norm_m
norm2_m

norm_w <- wildtype2[nrow(wildtype2),(length(jt)+U)]
norm2_w <- (norm_w/(as.matrix(wildtype_tot)[nrow(wildtype2),]))
norm_w
norm2_w

#indels correction
mutations3 <- matrix(NA, nrow=length(time_all)+U, ncol=length(jt))
for (q in 1:length(jt)){
mutations3[,q] <- as.matrix(mutations_tot)[,q]*norm2_m[q]
}

rownames(mutations3)<- namesA
colnames(mutations3)<- paste("index", jt)


#wildtype correction
wildtype4 <- matrix(NA, nrow=length(time_all)+U, ncol=length(jt))
for (q in 1:length(jt)){
  
  if(is.na(norm_m)){
wildtype4[,q] <- ((as.matrix(wildtype_tot)[,q]-as.matrix(wildtype_tot)[nrow(as.matrix(wildtype_tot)),q])*norm2_m[q])+norm_w
  } else {
  wildtype4[,q] <- 100-mutations3[,q]
  norm_w <- 100-norm_m}
}

rownames(wildtype4)<-namesA
colnames(wildtype4)<- paste("index", jt)

if(!is.na(mutations_c_tot)){
mutations3c <- matrix(NA, nrow=length(time_c_all)+1, ncol=length(jt))
for (q in 1:length(jt)){
mutations3c[,q] <- as.matrix(mutations_c_tot)[,q]*norm2_m[q]
}

rownames(mutations3c)<- as.character(c(time_c_all, 'U'))
colnames(mutations3c)<- paste("index", jt)
}

return(list(
  mutations3=mutations3,
  wildtype4=wildtype4,
  mutations3c=mutations3c,
  norm2_m=norm2_m,
  norm_w=norm_w, 
  norm_m=norm_m))

}

#####################################
#Timeseries TIDE
#####################################

timeseriesplots_TIDE <- function(time_all, j, q1, q2){
  
wildtype_tot <- mutations <- matrix(NA, nrow=length(time_all)+1)
mutations_tot <- mutations <- matrix(NA, nrow=length(time_all)+1)
rownames(wildtype_tot) <- rownames(mutations_tot) <- c(time_all,"U")

for (i in 1:length(j)){

wildtype <- mutations <- matrix(NA, nrow=length(time_all))
rownames(wildtype) <- rownames(mutations) <- time_all
wt <- q1[[i]]
indel <- q2[[i]]

#substract baseline 
indel <- indel-indel[1]

indel[indel<=0] <- 0

tot <- wt+indel
tot

wt <- (wt/tot)*100
indel <- (indel/tot)*100
wt+indel

ptime <- time_all


wildtype[as.character(ptime),] <- wt
mutations[as.character(ptime),] <- indel

# sigmoidal function
sigmoid = function(params, x) {
  1-params[1] / (1 + exp(-params[2] * (x - params[3])))
}

df6 <- data.frame(time=ptime, wt=wt,indel=indel)
df6[,2:3] <- df6[,2:3]/100

# fitting untransfected
x=df6[,1];
y=df6[,2];
z=df6[,3];
  
fitmodel <- nls(y~(1-a/(1 + exp(-b * (x-c)))), start=list(a=0.3,b=1,c=1))

U = coef(fitmodel)[1];
Ui = 1-coef(fitmodel)[1];

wildtype1 <- rbind(wildtype, Ui*100)
mutations1 <- rbind(mutations, U*100)

wildtype_tot <- cbind(wildtype_tot, wildtype1)
mutations_tot <- cbind(mutations_tot, mutations1)

}
wildtype_tot <- wildtype_tot[,-1]
mutations_tot <- mutations_tot[,-1]
colnames(wildtype_tot) <- colnames(mutations_tot) <- paste('index', j)

return(list(
  wildtype_tot=wildtype_tot,
  mutations_tot=mutations_tot))
}


#####################################
#Normalized timeseries TIDE
#####################################

timeseriesplots_TIDE_norm <- function(time_all, j, wildtype_tot, mutations_tot, norm_m=NA){

wildtype2 <- cbind(wildtype_tot, mean=rowMeans(wildtype_tot))
mutations2 <- cbind(mutations_tot, mean=rowMeans(mutations_tot))

if(is.na(norm_m)){
  namesA <- as.character(c(time_all, 'U'))
} else{
  namesA <- as.character(c(time_all))
}

#determine factor to normalize all timeseries
if(is.na(norm_m)){
  U <- 1
  norm_m <- mutations2[nrow(mutations2),(ncol(wildtype_tot)+1)]
} else {
  U <- 0
}
norm2_m <- (norm_m/(mutations_tot[nrow(mutations2),]))

norm_w <- wildtype2[nrow(wildtype2),(ncol(wildtype_tot)+1)]
norm2_w <- (norm_w/(wildtype_tot[1,]))

#indels
mutations3 <- matrix(NA, nrow=length(time_all)+U, ncol=ncol(wildtype_tot))
for (q in 1:ncol(wildtype_tot)){
  mutations3[,q] <- mutations_tot[,q]*norm2_m[q]
}

rownames(mutations3)<-namesA

#wildtype
wildtype4 <- matrix(NA, nrow=length(time_all)+U, ncol=ncol(wildtype_tot))
for (q in 1:ncol(wildtype_tot)){

    if(is.na(norm_m)){
wildtype4[,q] <- ((as.matrix(wildtype_tot)[,q]-as.matrix(wildtype_tot)[nrow(as.matrix(wildtype_tot)),q])*norm2_m[q])+norm_w
  } else {
  wildtype4[,q] <- 100-mutations3[,q]
  norm_w <- 100-norm_m}
}

rownames(wildtype4)<-namesA

sd_w <- var_w <- NA
if (length(j)>1){
sd_w <- apply(wildtype4[(1:length(time_all)),], 1, sd)
var_w <- apply(wildtype4[(1:length(time_all)),], 1, var)
}

return(list(
  mutations3=mutations3,
  wildtype4=wildtype4,
  norm2_m=norm2_m, 
  norm_w=norm_w,
  norm_m=norm_m))
}


###################################################
#Individual indels +1 insertion and -7 deletions
###################################################

# retrieve the data of the the +1 insertion and the -7 deletion from total indels
# each timepoint is corrected for the total reads per sample
# the indels at t=0 are substracted from all timeoints
# notclear sample (no assigned to wt, deletion, insertion or point mutations) are discarded
# Point mutations are considered as wt reads

timeseriesplots_indel <- function(V_order, Z_order, jt, time_all){
  
deletion7_tot <- insertion1_tot <- mut3_tot <- intact_tot <- matrix(NA, nrow=length(time_all)+1)
rownames(deletion7_tot) <- rownames(insertion1_tot) <- rownames(mut3_tot) <- rownames(intact_tot) <-  c(time_all,"U")

for (q in 1:length(jt)){
deletion7 <- insertion1 <- mut3 <- intact <- matrix(NA, nrow=length(time_all))
rownames(deletion7) <- rownames(insertion1) <- rownames(mut3) <- rownames(intact) <- time_all

j <- jt[q]

df <- ldply(Y_order[[j]], data.frame)
df2 <- ddply(df, .(V2), function(x) { x$index <- 1:nrow(x); x})
colnames(df2) <- c("tot_read", "type", "index")

#check
head(df)
head(df2)
head(Z_order[[j]][1])

#take the ratio by dividing to total amount of read per sample to normalize the difference of reads and/or concentration input
notcl <- (df2$tot_read[df2$type=="not_clear"]/Z_order[[j]])*100
notcl
notcl2 <- (df2$tot_read[df2$type=="not_clear"])

#any other mutation can be investigated by changes these variables
ins_1 <- 1
del_7 <- -7
wild <- 0
il <- dl <- wl <- ol <- data.frame(NA)
i1 <- d7 <- w0 <- ot <- data.frame(NA)
il_c <- dl_c <- wl_c <- ol_c <- data.frame(NA)
i1_c <- d7_c <- w0_c <- ot_c <- data.frame(NA)

i1[1] <-  d7[1] <- w0[1] <- ot[1] <- NA

for (i in 1:length(V_order[[j]])) { 
  y <- V_order[[j]][[i]]
  shiftrange <- rownames(y)
  
  A <- data.frame(indel=as.numeric(shiftrange), p=as.vector(y))
  il[i] <- A[(which(A["indel"]==ins_1)),'p'] ;    if(is.na(il[i])) {il[i]<- 0}
  dl[i] <- A[(which(A["indel"]==del_7)),'p'] ;    if(is.na(dl[i])) {dl[i]<- 0}
  wl[i] <- A[(which(A["indel"]==wild)),'p'] ;     if(is.na(wl[i])) {wl[i]<- 0}
  ol[i] <- colSums(A["p"])-il[i]-dl[i]-wl[i] ;    if(is.na(ol[i])) {ol[i]<- 0}
  
  i1[i] <- il[i]/(Z_order[[j]][[i]]-notcl2[i])*100
  d7[i] <- dl[i]/(Z_order[[j]][[i]]-notcl2[i])*100
  w0[i] <- wl[i]/(Z_order[[j]][[i]]-notcl2[i])*100
  ot[i] <- ol[i]/(Z_order[[j]][[i]]-notcl2[i])*100
}  

i1+d7+w0+ot

i1 <- as.numeric(i1)-as.numeric(i1)[1]
d7 <- as.numeric(d7)-as.numeric(d7)[1]
ot <- as.numeric(ot)-as.numeric(ot)[1]

i1[i1<=0] <- d7[d7<=0] <- ot[ot<=0] <- 0

tot2 <- i1+d7+w0+ot
tot2

i1 <- (i1/tot2)*100
d7 <- (d7/tot2)*100
w0 <- (w0/tot2)*100
ot <- (ot/tot2)*100
i1+d7+w0+ot

  ptime <- time_all
  
  #timeseries 3 had one different time point t=37 instead of t=21
  #some of controls have different time points
  time <- c(0, 4, 8, 10, 12, 14, 16, 18, 21, 24, 33, 42, 60)
  time2 <- c(0, 4, 8, 10, 12, 14, 16, 18, 24, 33, 37, 42, 60)
  time3 <- c( 0,4,8,10,12,14,16,18,21,24,33,42,60,93,115,158)
  time4 <- c(0, 4, 8, 10, 12, 14, 16, 18, 21, 24, 33, 42, 48, 54, 60)
  time5 <- c(0, 4, 8, 10, 12, 14, 16, 18, 21, 24, 33, 38, 42, 48, 54, 60)
  time_c <- c(0,8,16,24,60,70)
  time_c1 <- c(0,8,18,60,70)
  time_c2 <- c(60,70)
  time_c3 <- c(0,8,16,18, 24,60,158,165)
  time_c4 <- c(0,24,60,70)

  if((!j==3|!j==4) & length(time_all)>7){
    ptime <- time
  }

  if(j==3|j==4){
    ptime <- time2
  }
  
  if(j==43|j==44||j==47|j==48){
    ptime <- time4
  }
  
  if(j==45|j==46||j==49|j==50|j==51){
    ptime <- time5
  }

  
  if((!j==3|!j==4) & length(time_all)<14){
    ptime <- time_c
  }
  
  if(j==13){
    ptime <- time_c1
  }

  if(j==16){
    ptime <- time_c2
  }
  
   if(j==52|j==53||j==54|j==55|j==56|j==57||j==58|j==59|j==60){
    ptime <- time_c4
  }

  
insertion1[as.character(ptime),] <- t(i1)
deletion7[as.character(ptime),] <- t(d7)
mut3[as.character(ptime),] <- t(ot)
intact[as.character(ptime),] <- t(w0)
                                     
df7 <- t(rbind(ptime, i1,d7, ot, w0))
colnames(df7) <- c("time", "i1", "d7", 'm3', 'w0')
         
df7[,2:5] <- df7[,2:5]/100

# fitting untransfected
x=df7[,1];
y=df7[,2];
z=df7[,3];
k=df7[,4];
l=df7[,5];
  
  fitmodel_1 <- nls(y~(1-(1-a/(1 + exp(-b * (x-c))))), start=list(a=15,b=1,c=1))
  
  fitmodel_7 <- nls(z~(1-(1-a/(1 + exp(-b * (x-c))))), start=list(a=14,b=1,c=1))
    
  fitmodel_m <- nls(k~(1-(1-a/(1 + exp(-b * (x-c))))), start=list(a=10,b=1,c=1))

  fitmodel_0 <- nls(l~(1-a/(1 + exp(-b * (x-c)))), start=list(a=19,b=1,c=1))  

U1 = coef(fitmodel_1)[1];
U7 = coef(fitmodel_7)[1];
Um = coef(fitmodel_m)[1];
U0 = 1-coef(fitmodel_0)[1];
U02 = 1-(U1+U7+Um)

insertion1a <- rbind(insertion1, U1*100)
deletion7a <- rbind(deletion7, U7*100)
mut3a <- rbind(mut3, Um*100)
intacta <- rbind(intact, U02*100)

insertion1_tot <- cbind(insertion1_tot, insertion1a)
deletion7_tot <- cbind(deletion7_tot, deletion7a)
mut3_tot <- cbind(mut3_tot, mut3a)
intact_tot <- cbind(intact_tot , intacta)
}

insertion1_tot <- as.matrix(insertion1_tot[,-1])
deletion7_tot <- as.matrix(deletion7_tot[,-1])
mut3_tot <- as.matrix(mut3_tot[,-1])
intact_tot <- as.matrix(intact_tot[,-1])
colnames(insertion1_tot) <- colnames(deletion7_tot) <- colnames(mut3_tot) <- colnames(intact_tot) <- paste('index', jt)

return(list(
  insertion1_tot=insertion1_tot,
  deletion7_tot=deletion7_tot,
  mut3_tot=mut3_tot,
  intact_tot=intact_tot))
}

#################################################
#Normalize Individual indels +1 insertion and -7 deletions to untransfected fraction
###################################################

# To use same normalization value as for total indel with function "timeseriesplots_norm", the normalizaiton factor can be added as input

timeseriesplots_indel_norm <- function(jt, time_all, insertion1_tot, deletion7_tot, mut3_tot, intact_tot, wildtype_tot=wildtype_tot,norm2_m=norm2_m,norm_w=norm_w){

insertion1b <- cbind(insertion1_tot, rowMeans(insertion1_tot))
deletion7b <- cbind(deletion7_tot, rowMeans(deletion7_tot))
mut3b <- cbind(mut3_tot, rowMeans(mut3_tot))
intactb <- cbind(intact_tot, rowMeans(intact_tot))

#insertion
insertion1c <- matrix(NA, nrow=length(time_all)+1, ncol=length(jt))
for (q in 1:length(jt)){
  insertion1c[,q] <- insertion1_tot[,q]*norm2_m[q]
}

rownames(insertion1c[(1:length(time_all)),])<- as.character(time_all)

apply(insertion1c[(1:length(time_all)),], 1, sd)
apply(insertion1c[(1:length(time_all)),], 1, var)

#deletion
deletion7c <- matrix(NA, nrow=length(time_all)+1, ncol=length(jt))
for (q in 1:length(jt)){
  deletion7c[,q] <- deletion7_tot[,q]*norm2_m[q]
}
rownames(deletion7c[(1:length(time_all)),])<- as.character(time_all)

apply(deletion7c[(1:length(time_all)),], 1, sd)
apply(deletion7c[(1:length(time_all)),], 1, var)

#other mutations
mut3c <- matrix(NA, nrow=length(time_all)+1, ncol=length(jt))
for (q in 1:length(jt)){
  mut3c[,q] <- mut3_tot[,q]*norm2_m[q]
}

rownames(mut3c[(1:length(time_all)),])<- as.character(time_all)

apply(mut3c[(1:length(time_all)),], 1, sd)
apply(mut3c[(1:length(time_all)),], 1, var)

#wildtype
intact2 <- matrix(NA, nrow=length(time_all)+1, ncol=length(jt))
for (q in 1:length(jt)){
  intact2[,q] <- ((intact_tot[,q]-wildtype_tot[nrow(wildtype_tot),q])*norm2_m[q])+norm_w
}

rownames(intact2[(1:length(time_all)),])<- as.character(time_all)

apply(intact2[(1:length(time_all)),], 1, sd)
apply(intact2[(1:length(time_all)),], 1, var)

return(list(
  insertion1c=insertion1c,
  deletion7c=deletion7c,
  mut3c=mut3c,
  intact2=intact2))
}


###################################################
#Individual indels +1 insertion and -7 deletions TIDE
###################################################

timeseriesplots_indel_TIDE <- function(time_all, j, q1, qi2, qi3, qi4){
  
intact_tot <- matrix(NA, nrow=length(time_all)+1)
insertion1_tot <- matrix(NA, nrow=length(time_all)+1)
deletion7_tot <- matrix(NA, nrow=length(time_all)+1)
mut3_tot <- matrix(NA, nrow=length(time_all)+1)
rownames(intact_tot) <- rownames(insertion1_tot) <- rownames(deletion7_tot) <- rownames(mut3_tot) <- c(time_all,"U")


for (i in 1:length(j)){

intact <- insertion1 <- deletion7 <- mut3 <- matrix(NA, nrow=length(time_all))
rownames(intact) <- rownames(insertion1) <- rownames(deletion7) <- rownames(mut3) <- time_all
wt <- q1[[i]]
in1 <- qi2[[i]]
del7 <- qi3[[i]]
ot <- qi4[[i]]

#substract baseline 
in1 <- in1-in1[1]
in1[in1<=0] <- 0

del7 <- del7-del7[1]
del7[del7<=0] <- 0

ot <- ot-ot[1]
ot[ot<=0] <- 0

tot <- wt+in1+del7+ot
tot

wt <- (wt/tot)*100
in1 <- (in1/tot)*100
del7 <- (del7/tot)*100
ot <- (ot/tot)*100
wt+in1+del7+ot

ptime <- time_all


intact[as.character(ptime),] <- wt
insertion1[as.character(ptime),] <- in1
deletion7[as.character(ptime),] <- del7
mut3[as.character(ptime),] <- ot

# sigmoidal function
sigmoid = function(params, x) {
  1-params[1] / (1 + exp(-params[2] * (x - params[3])))
}

df6 <- data.frame(time=ptime, wt=wt,ins1=in1, del7=del7, mut3=ot)
df6[,2:5] <- df6[,2:5]/100

# fitting untransfected
x=df6[,1];
y=df6[,2];
z=df6[,3];
k=df6[,4];
l=df6[,5]

fitmodel <- nls(y~(1-a/(1 + exp(-b * (x-c)))), start=list(a=0.3,b=1,c=1))

fitmodel_1 <- nls(z~(1-(1-a/(1 + exp(-b * (x-c))))), start=list(a=0.5,b=0.1,c=20))

fitmodel_7 <- nls(k~(1-(1-a/(1 + exp(-b * (x-c))))), start=list(a=1,b=0.1,c=20))

#fitmodel_o <- nls(l~(1-(1-a/(1 + exp(-b * (x-c))))), start=list(a=0.5,b=0.1,c=20))

U = 1-coef(fitmodel)[1];
U1 = coef(fitmodel_1)[1];
U7 = coef(fitmodel_7)[1];
#Uo = coef(fitmodel_o)[1];

intact1 <- rbind(intact, U*100)
insertion1c <- rbind(insertion1, U1*100)
deletion7c <- rbind(deletion7, U7*100)
mut3c <- rbind(mut3, 5)

intact_tot <- cbind(intact_tot, intact1)
insertion1_tot <- cbind(insertion1_tot, insertion1c)
deletion7_tot <- cbind(deletion7_tot, deletion7c)
mut3_tot <- cbind(mut3_tot, mut3c)

}

intact_tot <- intact_tot[,-1]
insertion1_tot <- insertion1_tot[,-1]
deletion7_tot <- deletion7_tot[,-1]
mut3_tot <- mut3_tot[,-1]
colnames(intact_tot) <- colnames(insertion1_tot) <- colnames(deletion7_tot) <- colnames(mut3_tot) <- paste('index', j)


return(list(
  insertion1_tot=insertion1_tot,
  deletion7_tot=deletion7_tot,
  mut3_tot=mut3_tot,
  intact_tot=intact_tot))

}


#####################################
#Gompertz fitting
#####################################

gompertzfit <- function(time_all, wildtype_tot, mutations_tot){
  
  # fit gompertz function
  gompertz = function(params, x) {
    1-(params[1] * exp(-params[2] * exp(-params[3]*x)))
  }

  gompertz_indels = function(params, x) {
    (params[1] * exp(-params[2] * exp(-params[3]*x)))
  }

  df7 <- data.frame(time=time_all, wt=wildtype_tot[(1:length(time_all))],indel=mutations_tot[(1:length(time_all))])
  df7[,2:3] <- df7[,2:3]/100

# fitting
x=df7[,1];
y=df7[,2];
z=df7[,3];
  
fitmodel <- nls(y~(1-(a * exp(-b * exp(-c * x)))), start=list(a=20,b=10,c=0.1))
fitmodel_i <- nls(z~(a * exp(-b * exp(-c * x))), start=list(a=20,b=10,c=0.1))
  
sm <- rbind(coef(fitmodel), coef(fitmodel_i))
cf <- coef(fitmodel_i);
  
  plot(df7[,1],(df7[,2])*100, xlab='time (h)',ylab='fraction in the pool (%)', xlim=c(0,max(df7[,1])),ylim=c(0,100),pch=20,cex=1.5, col=2)
  lines(seq(0,max(df7[,1]),0.1),gompertz(coef(fitmodel),seq(0,max(df7[,1]),0.1))*100,lwd=2, col=2)
  
  lines(df7[,1],(df7[,3])*100, type="p", xlab='time (h)',ylab='fraction in the pool (%)',xlim=c(0,max(df7[,1])),ylim=c(0,100),pch=20, cex=1.5, col=3)
  lines(seq(0,max(df7[,1]),0.1),gompertz_indels(coef(fitmodel_i),seq(0,max(df7[,1]),0.1))*100,lwd=2, col=3)

legend("topright", legend=c("wildtype", "indels", "fit wildtype", "fit indels"), lty=c(NA,NA,1,1), pch=c(20, 20, NA, NA), col=c("red", "green",'red', 'green'), bty="n")
  
legend('bottomright',paste(c('a', 'b', 'c'), round(c((cf[1])*100, cf[2:3]),2),seqp=''),bty='n')

return(cf)
}


##################################################
#Gompertz fitting individual indels
##################################################

gompertzfit_Mutants <- function(time_all, wt, plus1, minus7, othermut){

  # fit gompertz function
  gompertz = function(params, x) {
    1-(params[1] * exp(-params[2] * exp(-params[3]*x)))
  }

  gompertz_indels = function(params, x) {
    (params[1] * exp(-params[2] * exp(-params[3]*x)))
  }
  
  df7 <- data.frame(time=time_all, i1=plus1[(1:length(time_all))], d7=minus7[(1:length(time_all))], ot=othermut[(1:length(time_all))])
  df7[,2:4] <- df7[,2:4]/100
         
# fitting untransfected
x=df7[,1];
y=df7[,2];
z=df7[,3];
z2=df7[,4];

  fitmodel_1 <- nls(y~(a * exp(-b * exp(-c * x))), start=list(a=20,b=10,c=0.1))
  fitmodel_7 <- nls(z~(a * exp(-b * exp(-c * x))), start=list(a=20,b=10,c=0.1))
  fitmodel_m <- nls(z2~(a * exp(-b * exp(-c * x))), start=list(a=20,b=10,c=0.1))

  plot(df7[,1],(df7[,2])*100, xlab='time (h)',ylab='+1 insertion (%)', pch=20, cex=1.5,col="#3B3B3B")
  lines(seq(0,max(df7[,1]),0.1),gompertz_indels(coef(fitmodel_1),seq(0,max(df7[,1]),0.1))*100,lwd=2, col="#3B3B3B")
    
  par(new=TRUE)
  
  plot(df7[,1],(df7[,3])*100, type="p", ylim=c(0, max(df7[,3], na.rm=TRUE)*100),pch=20, cex=1.5,col="#CFCFCF", xaxt="n",yaxt="n",xlab="",ylab="")
    
  lines(seq(0,max(df7[,1]),0.1),gompertz_indels(coef(fitmodel_7),seq(0,max(df7[,1]),0.1))*100,lwd=2, col="#CFCFCF")
 axis(4)
   mtext("-7 deletion (%)",side=4,line=3)
   legend("topleft",col=c("#3B3B3B","#CFCFCF", "#3B3B3B","#CFCFCF"),lty=c(NA,NA,1,1), pch=c(20, 20, NA, NA), legend=c("+1 insertion","-7 deletion", "fit +1 insertion","fit -7 deletion"), bty='n')
  
  cf1 <- coef(fitmodel_1)
  cf7 <- coef(fitmodel_7)
  cfm <- coef(fitmodel_m)
  
  gom <- rbind(coef(fitmodel_1), coef(fitmodel_7), coef(fitmodel_m))
  gom[,1] <- gom[,1]
  rownames(gom) <- c("+1", "-7",  "other_mut")

  legend('bottomright',paste(c('a', 'b', 'c'), round(gom[1,],2), round(gom[2,],2), round(gom[3,],2) ,seqp=''),bty='n')
  
return(gom)}


###################################################
#Gompertz Average
###################################################

library("Hmisc")
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:Biostrings':
## 
##     mask, translate
## The following object is masked from 'package:BiocGenerics':
## 
##     combine
## The following objects are masked from 'package:plyr':
## 
##     is.discrete, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, round.POSIXt, trunc.POSIXt, units
gompertz_av <- function(time_all, time_c_all, wildtype_tot, mutations_tot, mutations_c_tot){

mutations_tot_av <- rowMeans(mutations_tot[1:length(time_all),])
wildtype_tot_av <- rowMeans(wildtype_tot[1:length(time_all),])
mutations_c_tot_av <- rowMeans(mutations_c_tot[1:length(time_c_all),])
time_c2 <- time_c_all[-(max(time_c_all))]

sd_m <- var_m <- sd_w <- var_w <- sd_mc <- var_mc <- NA
if ( length(wildtype_tot[1,])>1){
sd_w <- apply(wildtype_tot[(1:length(time_all)),], 1, sd)
var_w <- apply(wildtype_tot[(1:length(time_all)),], 1, var)
sd_m <- apply(mutations_tot[(1:length(time_all)),], 1, sd)
var_m <- apply(mutations_tot[(1:length(time_all)),], 1, var)
sd_mc <- apply(mutations_c_tot[(1:length(time_c_all)),], 1, sd)
var_mc <- apply(mutations_c_tot[(1:length(time_c_all)),], 1, var)
}

mutations4 <- data.frame(time=time_all, mean=mutations_tot_av, sd=sd_m)
wildtype5 <- data.frame(time=time_all, mean=wildtype_tot_av, sd=sd_w)
mutations4_c <- data.frame(time=time_c_all, mean=mutations_c_tot_av, sd=sd_mc)

# fit untransfected fraction
# sigmoidal function for wildtype_gompertz
gompertz = function(params, x) {
  1-(params[1] * exp(-params[2] * exp(-params[3]*x)))
}

# sigmoidal function for indels_gompertz
gompertz_indels = function(params, x) {
  (params[1] * exp(-params[2] * exp(-params[3]*x)))
}

df7 <- data.frame(time=time_all, wt=wildtype_tot_av[(1:length(time_all))],indel=mutations_tot_av[(1:length(time_all))])
df6c <- data.frame(time=time_c_all, indel=mutations_c_tot_av)

# fitting untransfected
x=df7[,1];
y=df7[,2];
z=df7[,3];
l=df6c[,1]
k=df6c[,2]
  
y <- y/100
z <- z/100
k <- k/100

fitmodel <- nls(y~(1-(a * exp(-b * exp(-c * x)))), start=list(a=20,b=10,c=0.1))
  
U = 1-coef(fitmodel)[1];
  
fitmodel_i <- nls(z~(a * exp(-b * exp(-c * x))), start=list(a=20,b=10,c=0.1))

sm <- rbind(coef(fitmodel), coef(fitmodel_i))
cf <- coef(fitmodel_i);

#plot
plot(mutations4$time, mutations4$mean, type="n", ylim=c(0,100), xlim=c(0,60), xlab="time (h)", ylab="fraction in pool (%)")
par(fg=3) 
with (
  data = mutations4
  , expr = errbar(time, mean, mean+sd, mean-sd, add=T, pch=20, cap=.01, col=3)
)

lines(seq(0,max(df7[,1]),0.1),gompertz_indels(coef(fitmodel_i),seq(0,max(df7[,1]),0.1))*100,lwd=2, col=3)

lines(wildtype5$time, wildtype5$mean, type="n")
par(fg=2) 
with (
  data = wildtype5
  , expr = errbar(time, mean, mean+sd, mean-sd, add=T, pch=20, cap=.01, col=2)
)

lines(seq(0,max(df7[,1]),0.1),gompertz(coef(fitmodel),seq(0,max(df7[,1]),0.1))*100,lwd=2, col=2)


lines(mutations4_c$time, mutations4_c$mean, type="n")
par(fg="grey") 
with (
  data = mutations4_c
  , expr = errbar(time, mean, mean+sd, mean-sd, add=T, pch=20, cap=.01, col="gray15")
)

lines(df6c[time_c2,1],df6c[time_c2,2],col="grey15")


par(fg=1) 
legend("topright", legend=c("intact", "indel +shield", "indel -shield", paste("n =", ncol(mutations_tot)) ), pch=16, col=c(2, 3, "gray15", NA), bty="n")

}


###################################################
#Indel spectrum
###################################################

# plot all indels present in this sample

indel_spectrum <- function(V_order, Z_order, j){

for (i in 1:length(V_order[[j]])) { 
  y <- (V_order[[j]][[i]][1:length(V_order[[j]][[i]])]/Z_order[[j]][i])*100
  
  shiftrange <- rownames(y)
  
  indel.summary <-round(as.numeric(y),1)
  
  if(length(shiftrange)==0){
    shiftrange <- b_del <- b_ins <- 0
  }
  b_del <- abs(min(as.numeric(shiftrange)))
  b_ins <- abs(max(as.numeric(shiftrange)))
  
  range <- c(-b_del:b_ins)
  
  A <- data.frame(indel=as.numeric(shiftrange), p=indel.summary)
  B <- data.frame(indel=range)
  C <- merge(A, B, all=TRUE)   
  
  COL <- "black"
  COL[!C[, "indel"]==0] <- "#1874CD"
  COL[C[, "indel"]==0] <- "#98F5FF"
  
  if(length(range)>1){
  bp <- barplot(as.vector(C[,"p"]), 
                main=paste("time series", j, "sample", i),
                col=COL, 
                border = COL,
                names.arg=range,
                xlab="type of indel",
                ylim=c(0, max(as.vector(C[,"p"]), na.rm=TRUE)+10), 
                ylab="% of sequences")
  
  #above each group of bars: show percentage (mean across four bases)
  text(bp[C[,"p"]>5], (C[,"p"]+5)[C[,"p"]>5], as.character(((round(C[,"p"],1))[C[,"p"]>5])))
  
  #display Rsq values as an indication of the accuracy:
  eff <- round((100) - as.vector( C[,"p"][C[, "indel"]==0]),1)

  dl <- (C[,"indel"]<0)
  insert <- (C[,"indel"]>0)
  deletions <- sum(as.vector(C[dl,"p"]), na.rm=TRUE)
  insertions <- sum(as.vector(C[insert,"p"]), na.rm=TRUE)
  

  legend("topleft", legend=c(paste("total eff. =", eff, "%"), paste("total del =", deletions, "%"), paste("total ins =", insertions, "%"),   paste("total reads =", Z_order[[j]][i] )), bty="n")
  
  mut.summary <- data.frame(percentage = round(indel.summary,1))
  rownames(mut.summary) <- shiftrange
  rownames(mut.summary)[which(shiftrange>0)]=paste('+',rownames(mut.summary)[which(shiftrange>0)],sep='')
  
  }
  }

}

###################################################
#Indel composition
###################################################

indel_composition <- function(X_order, V_order, Z_order, j, indel, rg1, rg2){
  ## indel= size of indel you want to inspect e.g. 1 insertion or -7 deletion
  ## rg1 = start of region of all reads with chosen indel that you want to see composition of
  ## rg2 = end of region of all reads with chosen indel that you want to see composition of

for (i in 1:length(V_order[[j]])) { 
  y <- V_order[[j]][[i]]
  shiftrange <- rownames(y)

  A <- data.frame(indel=as.numeric(shiftrange), p=as.vector(y))
  dl <-  A[(which(A["indel"]==indel)),'p']    
    
    if(length(dl)>0){
      y2 <- X_order[[j]][[i]][,4][(which(X_order[[j]][[i]][,3]==indel))]
      y3 <- substr(y2,rg1,rg2)
      y4 <- DNAStringSet(y3)
      y5 <- consensusMatrix(y4, baseOnly=TRUE)
      
      y6 <- matrix(NA, nrow=5, ncol=21)
      for(k in 1:21){
        y6[,k] <- y5[,k]/sum(y5[,k])
      }
      rownames(y6) <- c("A", "C", "G", "T", "other")
      y6 <- y6[-5,]
      
      par(mar=c(5.5, 4.5, 15.5, 4.5))
      seqLogo(y6)
      
      par(mar=c(5.1, 4.1, 4.1, 8.1), xpd=TRUE)
      barplot(y6, col=c(3,4,1,2), main=paste('time series', j, "sample", i, "indel=" , indel))
      legend("topright",inset=c(-0.2,0), legend=c("A", "C", "G", "T"), bty="n", pch=15, col=c(3,4,1,2))
    legend("bottomleft", inset=c(0,-0.2), legend=c(paste("total reads =", length(y2) )), bty="n")
      
    } else { 
      stop}
}
}

Plot timeseries

## Warning in df2$tot_read[df2$type == "ins"]/((Z_order[[j]]) - notcl2):
## longer object length is not a multiple of shorter object length

## Warning in if (!is.na(mutations_c_tot)) {: the condition has length > 1 and
## only the first element will be used
## Warning in if (!is.na(mutations_c_tot)) {: the condition has length > 1 and
## only the first element will be used

Gompertz

##plot example gompertz fit of timeseries 1
example1 <- gompertzfit(time_all, wildtype_tot_LBR2[,1], mutations_tot_LBR2[,1])

##############################
#normalize to untransfected fraction
##############################

###without inhibitor
ts_norm_wt_1 <- ts_norm_1$wildtype4[,-c(4,6,8,10)]
ts_norm_indel_1 <- ts_norm_1$mutations3[,-c(4,6,8,10)]

ts_norm_wt_1b <- ts_norm_wt_1[-nrow(ts_norm_wt_1),];
ts_norm_indel_1b <- ts_norm_indel_1[-nrow(ts_norm_indel_1),];

#controls
ts_norm_indel_1c <- ts_norm_1$mutations3c[,-c(4,6,8,10)]

####with inhibitor
ts_norm_wt_1N <- ts_norm_1$wildtype4[,c(4,6,8,10)]
ts_norm_indel_1N <- ts_norm_1$mutations3[,c(4,6,8,10)]

ts_norm_wt_1Nb <- ts_norm_wt_1N[-nrow(ts_norm_wt_1N),];
ts_norm_indel_1Nb <- ts_norm_indel_1N[-nrow(ts_norm_indel_1N),];

#average
gompertz_av(time_all=time_all, time_c_all=time_c_all, wildtype_tot=ts_norm_wt_1, mutations_tot=ts_norm_indel_1, mutations_c_tot=ts_norm_indel_1c)

#individual
###without inhibitor
gomp_ind <- sapply((1:ncol(ts_norm_wt_1b)), function(idx){gompertzfit(time_all=time_all, wildtype_tot=ts_norm_wt_1b[,idx], mutations_tot=ts_norm_indel_1b[,idx])})

gom_D_tA <- gomp_ind[1,]  
gom_D_tB <- gomp_ind[2,]
gom_D_tC <- gomp_ind[3,]

####with inhibitor
gomp_indN <- sapply((1:ncol(ts_norm_wt_1Nb)), function(idx){gompertzfit(time_all=time_all, wildtype_tot=ts_norm_wt_1Nb[,idx], mutations_tot=ts_norm_indel_1Nb[,idx])})

gom_N_tA <- gomp_indN[1,]  
gom_N_tB <- gomp_indN[2,]
gom_N_tC <- gomp_indN[3,]


###############################
#normalize to total indels
###############################
###without inhibitor
ts_norm_wt_2 <- ts_norm_2$wildtype4[,-c(4,6,8,10)]
ts_norm_indel_2 <- ts_norm_2$mutations3[,-c(4,6,8,10)]
#controls
ts_norm_indel_2c <- ts_norm_2$mutations3c[,-c(4,6,8,10)]

####with inhibitor
ts_norm_wt_2N <- ts_norm_2$wildtype4[,c(4,6,8,10)]
ts_norm_indel_2N <- ts_norm_2$mutations3[,c(4,6,8,10)]

#average
gompertz_av(time_all=time_all, time_c_all=time_c_all, wildtype_tot=ts_norm_wt_2, mutations_tot=ts_norm_indel_2, mutations_c_tot=ts_norm_indel_2c)

#individual
###without inhibitor
gomp_ind_2 <- sapply((1:ncol(ts_norm_wt_1b)), function(idx){gompertzfit(time_all=time_all, wildtype_tot=ts_norm_wt_2[,idx], mutations_tot=ts_norm_indel_2[,idx])})

####with inhibitor
gomp_ind_2N <- sapply((1:ncol(ts_norm_wt_1Nb)), function(idx){gompertzfit(time_all=time_all, wildtype_tot=ts_norm_wt_2N[,idx], mutations_tot=ts_norm_indel_2N[,idx])})

#############

wildtype_tot_LBR2b = wildtype_tot_LBR2[-nrow(wildtype_tot_LBR2),];
mutations_tot_LBR2b = mutations_tot_LBR2[-nrow(mutations_tot_LBR2),];
mutations_tot_LBR2_cb = mutations_tot_LBR2_c[-nrow(mutations_tot_LBR2_c),];

#only without inhibitor
wildtype_tot_b <- wildtype_tot_LBR2b[,-c(4,6,8,10)]
mutations_tot_b <- mutations_tot_LBR2b[,-c(4,6,8,10)]
mutations_c_tot_b <- mutations_tot_LBR2_cb[,-c(4,6,8,10)]

wildtype2 <- cbind(wildtype_tot_b, mean=rowMeans(wildtype_tot_b))
mutations2 <- cbind(mutations_tot_b, mean=rowMeans(mutations_tot_b))
mutations2c <- cbind(mutations_c_tot_b, mean=rowMeans(mutations_c_tot_b))

jt <- c(1,2,3,5,7,25,29,36)

#determine factor to normalize all timeseries
norm_m <- 100
norm2_m <- (norm_m/(as.matrix(mutations_tot_b)[nrow(mutations2),]))

#indels
mutations3 <- matrix(NA, nrow=length(time_all), ncol=length(jt))
for (q in 1:length(jt)){
  mutations3[,q] <- as.matrix(mutations_tot_b)[,q]*norm2_m[q]
}

rownames(mutations3)<- as.character(c(time_all))
colnames(mutations3)<- paste("index", jt)


mutations3c <- matrix(NA, nrow=length(time_c_all), ncol=length(jt))
for (q in 1:length(jt)){
  mutations3c[,q] <- as.matrix(mutations_c_tot_b)[,q]*norm2_m[q]
}

rownames(mutations3c)<- as.character(c(time_c_all))
colnames(mutations3c)<- paste("index", jt)

wildtype4 <- matrix(NA, nrow=length(time_all), ncol=length(jt))
for (q in 1:length(jt)){
  wildtype4[,q] <- 100-mutations3[,q]
}
rownames(wildtype4)<-as.character(c(time_all))
colnames(wildtype4)<- paste("index", jt)

mutations_tot_av <- rowMeans(mutations3[1:length(time_all),])
wildtype_tot_av <- rowMeans(wildtype4[1:length(time_all),])
mutations_c_tot_av <- rowMeans(mutations3c[1:length(time_c_all),], na.rm = T)
time_c2 <- time_c_all[-7]

sd_m <- var_m <- sd_w <- var_w <- sd_mc <- var_mc <- NA
if ( length(wildtype_tot_b[1,])>1){
  sd_w <- apply(wildtype4[(1:length(time_all)),], 1, sd, na.rm = T)
  var_w <- apply(wildtype4[(1:length(time_all)),], 1, var, na.rm = T)
  sd_m <- apply(mutations3[(1:length(time_all)),], 1, sd, na.rm = T)
  var_m <- apply(mutations3[(1:length(time_all)),], 1, var, na.rm = T)
  sd_mc <- apply(mutations3c[(1:(length(time_c_all))),], 1, sd, na.rm = T)
  var_mc <- apply(mutations3c[(1:(length(time_c_all))),], 1, var, na.rm = T)
}

mutations4 <- data.frame(time=time_all, mean=mutations_tot_av, sd=sd_m)
wildtype5 <- data.frame(time=time_all, mean=wildtype_tot_av, sd=sd_w)
mutations4_c <- data.frame(time=time_c_all, mean=mutations_c_tot_av, sd=sd_mc)

# fit untransfected fraction
# sigmoidal function for wildtype_gompertz
gompertz = function(params, x) {
  1-(params[1] * exp(-params[2] * exp(-params[3]*x)))
}

# sigmoidal function for indels_gompertz
gompertz_indels = function(params, x) {
  (params[1] * exp(-params[2] * exp(-params[3]*x)))
}

df7 <- data.frame(time=time_all, wt=wildtype_tot_av[(1:length(time_all))],indel=mutations_tot_av[(1:length(time_all))])
df6c <- data.frame(time=time_c_all, indel=mutations_c_tot_av)

# fitting untransfected
x=df7[,1];
y=df7[,2];
z=df7[,3];
l=df6c[,1]
k=df6c[,2]

y <- y/100
z <- z/100
k <- k/100

fitmodel <- nls(y~(1-(a * exp(-b * exp(-c * x)))), start=list(a=20,b=10,c=0.1))

U = 1-coef(fitmodel)[1];

fitmodel_i <- nls(z~(a * exp(-b * exp(-c * x))), start=list(a=20,b=10,c=0.1))

sm <- rbind(coef(fitmodel), coef(fitmodel_i))
cf <- coef(fitmodel_i);

#plot
plot(mutations4$time, mutations4$mean, type="n", ylim=c(0,100), xlim=c(0,60), xlab="time (h)", ylab="fraction in pool (%)", main="LBR2")
par(fg=3) 
with (
  data = mutations4
  , expr = errbar(time, mean, mean+sd, mean-sd, add=T, pch=20, cap=.01, col=3)
)

lines(seq(0,max(df7[,1]),0.1),gompertz_indels(coef(fitmodel_i),seq(0,max(df7[,1]),0.1))*100,lwd=2, col=3)

lines(wildtype5$time, wildtype5$mean, type="n")
par(fg=2) 
with (
  data = wildtype5
  , expr = errbar(time, mean, mean+sd, mean-sd, add=T, pch=20, cap=.01, col=2)
)

lines(seq(0,max(df7[,1]),0.1),gompertz(coef(fitmodel),seq(0,max(df7[,1]),0.1))*100,lwd=2, col=2)


lines(mutations4_c$time, mutations4_c$mean, type="n")
par(fg="grey") 
with (
  data = mutations4_c
  , expr = errbar(time, mean, mean+sd, mean-sd, add=T, pch=20, cap=.01, col="gray15")
)

lines(df6c[c(1,2,3,5,6),1],df6c[c(1,2,3,5,6),2],col="grey15")


par(fg=1) 
legend("topright", legend=c("intact", "indel +shield", "indel -shield", paste("n =", ncol(mutations_tot_b)) ), pch=16, col=c(2, 3, "gray15", NA), bty="n")

#############
#LBR8
#############

wildtype_tot_b = wildtype_tot_LBR8[-nrow(wildtype_tot_LBR8),];
mutations_tot_b = mutations_tot_LBR8[-nrow(mutations_tot_LBR8),];
mutations_c_tot_b = mutations_tot_LBR8_c[-nrow(mutations_tot_LBR8_c),];

wildtype2 <- cbind(wildtype_tot_b, mean=rowMeans(wildtype_tot_b))
mutations2 <- cbind(mutations_tot_b, mean=rowMeans(mutations_tot_b))
mutations2c <- cbind(mutations_c_tot_b, mean=rowMeans(mutations_c_tot_b))

jt <- jt_2

#determine factor to normalize all timeseries
norm_m <- 100
norm2_m <- (norm_m/(as.matrix(mutations_tot_b)[nrow(mutations2),]))

#indels
mutations3 <- matrix(NA, nrow=length(time_all), ncol=length(jt))
for (q in 1:length(jt)){
  mutations3[,q] <- as.matrix(mutations_tot_b)[,q]*norm2_m[q]
}

rownames(mutations3)<- as.character(c(time_all))
colnames(mutations3)<- paste("index", jt)


mutations3c <- matrix(NA, nrow=length(time_c_all), ncol=length(jt))
for (q in 1:length(jt)){
  mutations3c[,q] <- as.matrix(mutations_c_tot_b)[,q]*norm2_m[q]
}

rownames(mutations3c)<- as.character(c(time_c_all))
colnames(mutations3c)<- paste("index", jt)

wildtype4 <- matrix(NA, nrow=length(time_all), ncol=length(jt))
for (q in 1:length(jt)){
  wildtype4[,q] <- 100-mutations3[,q]
}
rownames(wildtype4)<-as.character(c(time_all))
colnames(wildtype4)<- paste("index", jt)

mutations_tot_av <- rowMeans(mutations3[1:length(time_all),], na.rm = T)
wildtype_tot_av <- rowMeans(wildtype4[1:length(time_all),], na.rm = T)
mutations_c_tot_av <- rowMeans(mutations3c[1:length(time_c_all),], na.rm = T)
time_c2 <- time_c_all[-7]

sd_m <- var_m <- sd_w <- var_w <- sd_mc <- var_mc <- NA
if ( length(wildtype_tot_b[1,])>1){
  sd_w <- apply(wildtype4[(1:length(time_all)),], 1, sd, na.rm = T)
  var_w <- apply(wildtype4[(1:length(time_all)),], 1, var, na.rm = T)
  sd_m <- apply(mutations3[(1:length(time_all)),], 1, sd, na.rm = T)
  var_m <- apply(mutations3[(1:length(time_all)),], 1, var, na.rm = T)
  sd_mc <- apply(mutations3c[(1:(length(time_c_all))),], 1, sd, na.rm = T)
  var_mc <- apply(mutations3c[(1:(length(time_c_all))),], 1, var, na.rm = T)
}

mutations4 <- data.frame(time=time_all, mean=mutations_tot_av, sd=sd_m)
wildtype5 <- data.frame(time=time_all, mean=wildtype_tot_av, sd=sd_w)
mutations4_c <- data.frame(time=time_c_all, mean=mutations_c_tot_av, sd=sd_mc)

# fit untransfected fraction
# sigmoidal function for wildtype_gompertz
gompertz = function(params, x) {
  1-(params[1] * exp(-params[2] * exp(-params[3]*x)))
}

# sigmoidal function for indels_gompertz
gompertz_indels = function(params, x) {
  (params[1] * exp(-params[2] * exp(-params[3]*x)))
}

df7 <- data.frame(time=time_all, wt=wildtype_tot_av[(1:length(time_all))],indel=mutations_tot_av[(1:length(time_all))])
df6c <- data.frame(time=time_c_all, indel=mutations_c_tot_av)

# fitting untransfected
x=df7[,1];
y=df7[,2];
z=df7[,3];
l=df6c[,1]
k=df6c[,2]

y <- y/100
z <- z/100
k <- k/100

fitmodel <- nls(y~(1-(a * exp(-b * exp(-c * x)))), start=list(a=20,b=10,c=0.1))

U = 1-coef(fitmodel)[1];

fitmodel_i <- nls(z~(a * exp(-b * exp(-c * x))), start=list(a=20,b=10,c=0.1))

sm <- rbind(coef(fitmodel), coef(fitmodel_i))
cf <- coef(fitmodel_i);

#plot
plot(mutations4$time, mutations4$mean, type="n", ylim=c(0,100), xlim=c(0,60), xlab="time (h)", ylab="fraction in pool (%)", main="LBR8")
par(fg=3) 
with (
  data = mutations4
  , expr = errbar(time, mean, mean+sd, mean-sd, add=T, pch=20, cap=.01, col=3)
)

lines(seq(0,max(df7[,1]),0.1),gompertz_indels(coef(fitmodel_i),seq(0,max(df7[,1]),0.1))*100,lwd=2, col=3)

lines(wildtype5$time, wildtype5$mean, type="n")
par(fg=2) 
with (
  data = wildtype5
  , expr = errbar(time, mean, mean+sd, mean-sd, add=T, pch=20, cap=.01, col=2)
)

lines(seq(0,max(df7[,1]),0.1),gompertz(coef(fitmodel),seq(0,max(df7[,1]),0.1))*100,lwd=2, col=2)


lines(mutations4_c$time, mutations4_c$mean, type="n")
par(fg="grey") 
with (
  data = mutations4_c
  , expr = errbar(time, mean, mean+sd, mean-sd, add=T, pch=20, cap=.01, col="gray15")
)

lines(df6c[c(1,2,3,5,6),1],df6c[c(1,2,3,5,6),2],col="grey15")


par(fg=1) 
legend("topright", legend=c("intact", "indel +shield", "indel -shield", paste("n =", ncol(mutations_tot_b)) ), pch=16, col=c(2, 3, "gray15", NA), bty="n")

#############
#AAVS1
#############

wildtype_tot_b = wildtype_tot_AAVS1[-nrow(wildtype_tot_AAVS1),];
mutations_tot_b = mutations_tot_AAVS1[-nrow(mutations_tot_AAVS1),];
mutations_c_tot_b = mutations_tot_AAVS1_c[-nrow(mutations_tot_AAVS1_c),];

wildtype2 <- cbind(wildtype_tot_b, mean=rowMeans(wildtype_tot_b, na.rm = T))
mutations2 <- cbind(mutations_tot_b, mean=rowMeans(mutations_tot_b, na.rm = T))
mutations2c <- cbind(mutations_c_tot_b, mean=rowMeans(mutations_c_tot_b, na.rm = T))

jt <- jt_3

#determine factor to normalize all timeseries
norm_m <- 100
norm2_m <- (norm_m/(as.matrix(mutations_tot_b)[nrow(mutations2),]))

#indels
mutations3 <- matrix(NA, nrow=length(time_all), ncol=length(jt))
for (q in 1:length(jt)){
  mutations3[,q] <- as.matrix(mutations_tot_b)[,q]*norm2_m[q]
}

rownames(mutations3)<- as.character(c(time_all))
colnames(mutations3)<- paste("index", jt)


mutations3c <- matrix(NA, nrow=length(time_c_all), ncol=length(jt))
for (q in 1:length(jt)){
  mutations3c[,q] <- as.matrix(mutations_c_tot_b)[,q]*norm2_m[q]
}

rownames(mutations3c)<- as.character(c(time_c_all))
colnames(mutations3c)<- paste("index", jt)

wildtype4 <- matrix(NA, nrow=length(time_all), ncol=length(jt))
for (q in 1:length(jt)){
  wildtype4[,q] <- 100-mutations3[,q]
}
rownames(wildtype4)<-as.character(c(time_all))
colnames(wildtype4)<- paste("index", jt)

mutations_tot_av <- rowMeans(mutations3[1:length(time_all),], na.rm = T)
wildtype_tot_av <- rowMeans(wildtype4[1:length(time_all),], na.rm = T)
mutations_c_tot_av <- rowMeans(mutations3c[1:length(time_c_all),], na.rm = T)
time_c2 <- time_c_all[-7]

sd_m <- var_m <- sd_w <- var_w <- sd_mc <- var_mc <- NA
if ( length(wildtype_tot_b[1,])>1){
  sd_w <- apply(wildtype4[(1:length(time_all)),], 1, sd, na.rm = T)
  var_w <- apply(wildtype4[(1:length(time_all)),], 1, var, na.rm = T)
  sd_m <- apply(mutations3[(1:length(time_all)),], 1, sd, na.rm = T)
  var_m <- apply(mutations3[(1:length(time_all)),], 1, var, na.rm = T)
  sd_mc <- apply(mutations3c[(1:(length(time_c_all))),], 1, sd, na.rm = T)
  var_mc <- apply(mutations3c[(1:(length(time_c_all))),], 1, var, na.rm = T)
}

mutations4 <- data.frame(time=time_all, mean=mutations_tot_av, sd=sd_m)
wildtype5 <- data.frame(time=time_all, mean=wildtype_tot_av, sd=sd_w)
mutations4_c <- data.frame(time=time_c_all, mean=mutations_c_tot_av, sd=sd_mc)

# fit untransfected fraction
# sigmoidal function for wildtype_gompertz
gompertz = function(params, x) {
  1-(params[1] * exp(-params[2] * exp(-params[3]*x)))
}

# sigmoidal function for indels_gompertz
gompertz_indels = function(params, x) {
  (params[1] * exp(-params[2] * exp(-params[3]*x)))
}

df7 <- data.frame(time=time_all, wt=wildtype_tot_av[(1:length(time_all))],indel=mutations_tot_av[(1:length(time_all))])
df6c <- data.frame(time=time_c_all, indel=mutations_c_tot_av)

# fitting untransfected
x=df7[,1];
y=df7[,2];
z=df7[,3];
l=df6c[,1]
k=df6c[,2]

y <- y/100
z <- z/100
k <- k/100

fitmodel <- nls(y~(1-(a * exp(-b * exp(-c * x)))), start=list(a=20,b=10,c=0.1))

U = 1-coef(fitmodel)[1];

fitmodel_i <- nls(z~(a * exp(-b * exp(-c * x))), start=list(a=20,b=10,c=0.1))

sm <- rbind(coef(fitmodel), coef(fitmodel_i))
cf <- coef(fitmodel_i);

#plot
plot(mutations4$time, mutations4$mean, type="n", ylim=c(0,100), xlim=c(0,60), xlab="time (h)", ylab="fraction in pool (%)", main="AAVS1")
par(fg=3) 
with (
  data = mutations4
  , expr = errbar(time, mean, mean+sd, mean-sd, add=T, pch=20, cap=.01, col=3)
)

lines(seq(0,max(df7[,1]),0.1),gompertz_indels(coef(fitmodel_i),seq(0,max(df7[,1]),0.1))*100,lwd=2, col=3)

lines(wildtype5$time, wildtype5$mean, type="n")
par(fg=2) 
with (
  data = wildtype5
  , expr = errbar(time, mean, mean+sd, mean-sd, add=T, pch=20, cap=.01, col=2)
)

lines(seq(0,max(df7[,1]),0.1),gompertz(coef(fitmodel),seq(0,max(df7[,1]),0.1))*100,lwd=2, col=2)


lines(mutations4_c$time, mutations4_c$mean, type="n")
par(fg="grey") 
with (
  data = mutations4_c
  , expr = errbar(time, mean, mean+sd, mean-sd, add=T, pch=20, cap=.01, col="gray15")
)

lines(df6c[c(1,2,3,5,6),1],df6c[c(1,2,3,5,6),2],col="grey15")


par(fg=1) 
legend("topright", legend=c("intact", "indel +shield", "indel -shield", paste("n =", ncol(mutations_tot_b)) ), pch=16, col=c(2, 3, "gray15", NA), bty="n")

#############
#chr11
#############

wildtype_tot_b = wildtype_tot_chr11[-nrow(wildtype_tot_chr11),];
mutations_tot_b = mutations_tot_chr11[-nrow(mutations_tot_chr11),];
mutations_c_tot_b = mutations_tot_chr11_c[-nrow(mutations_tot_chr11_c),];

wildtype2 <- cbind(wildtype_tot_b, mean=rowMeans(wildtype_tot_b, na.rm = T))
mutations2 <- cbind(mutations_tot_b, mean=rowMeans(mutations_tot_b, na.rm = T))
mutations2c <- cbind(mutations_c_tot_b, mean=rowMeans(mutations_c_tot_b, na.rm = T))

jt <- jt_4

#determine factor to normalize all timeseries
norm_m <- 100
norm2_m <- (norm_m/(as.matrix(mutations_tot_b)[nrow(mutations2),]))

#indels
mutations3 <- matrix(NA, nrow=length(time_all), ncol=length(jt))
for (q in 1:length(jt)){
  mutations3[,q] <- as.matrix(mutations_tot_b)[,q]*norm2_m[q]
}

rownames(mutations3)<- as.character(c(time_all))
colnames(mutations3)<- paste("index", jt)


mutations3c <- matrix(NA, nrow=length(time_c_all), ncol=length(jt))
for (q in 1:length(jt)){
  mutations3c[,q] <- as.matrix(mutations_c_tot_b)[,q]*norm2_m[q]
}

rownames(mutations3c)<- as.character(c(time_c_all))
colnames(mutations3c)<- paste("index", jt)

wildtype4 <- matrix(NA, nrow=length(time_all), ncol=length(jt))
for (q in 1:length(jt)){
  wildtype4[,q] <- 100-mutations3[,q]
}
rownames(wildtype4)<-as.character(c(time_all))
colnames(wildtype4)<- paste("index", jt)

mutations_tot_av <- rowMeans(mutations3[1:length(time_all),], na.rm = T)
wildtype_tot_av <- rowMeans(wildtype4[1:length(time_all),], na.rm = T)
mutations_c_tot_av <- rowMeans(mutations3c[1:length(time_c_all),], na.rm = T)
time_c2 <- time_c_all[-7]

sd_m <- var_m <- sd_w <- var_w <- sd_mc <- var_mc <- NA
if ( length(wildtype_tot_b[1,])>1){
  sd_w <- apply(wildtype4[(1:length(time_all)),], 1, sd, na.rm = T)
  var_w <- apply(wildtype4[(1:length(time_all)),], 1, var, na.rm = T)
  sd_m <- apply(mutations3[(1:length(time_all)),], 1, sd, na.rm = T)
  var_m <- apply(mutations3[(1:length(time_all)),], 1, var, na.rm = T)
  sd_mc <- apply(mutations3c[(1:(length(time_c_all))),], 1, sd, na.rm = T)
  var_mc <- apply(mutations3c[(1:(length(time_c_all))),], 1, var, na.rm = T)
}

mutations4 <- data.frame(time=time_all, mean=mutations_tot_av, sd=sd_m)
wildtype5 <- data.frame(time=time_all, mean=wildtype_tot_av, sd=sd_w)
mutations4_c <- data.frame(time=time_c_all, mean=mutations_c_tot_av, sd=sd_mc)

# fit untransfected fraction
# sigmoidal function for wildtype_gompertz
gompertz = function(params, x) {
  1-(params[1] * exp(-params[2] * exp(-params[3]*x)))
}

# sigmoidal function for indels_gompertz
gompertz_indels = function(params, x) {
  (params[1] * exp(-params[2] * exp(-params[3]*x)))
}

df7 <- data.frame(time=time_all, wt=wildtype_tot_av[(1:length(time_all))],indel=mutations_tot_av[(1:length(time_all))])
df6c <- data.frame(time=time_c_all, indel=mutations_c_tot_av)

# fitting untransfected
x=df7[,1];
y=df7[,2];
z=df7[,3];
l=df6c[,1]
k=df6c[,2]

y <- y/100
z <- z/100
k <- k/100

fitmodel <- nls(y~(1-(a * exp(-b * exp(-c * x)))), start=list(a=20,b=10,c=0.1))

U = 1-coef(fitmodel)[1];

fitmodel_i <- nls(z~(a * exp(-b * exp(-c * x))), start=list(a=20,b=10,c=0.1))

sm <- rbind(coef(fitmodel), coef(fitmodel_i))
cf <- coef(fitmodel_i);

#plot
plot(mutations4$time, mutations4$mean, type="n", ylim=c(0,100), xlim=c(0,60), xlab="time (h)", ylab="fraction in pool (%)", main="chr11")
par(fg=3) 
with (
  data = mutations4
  , expr = errbar(time, mean, mean+sd, mean-sd, add=T, pch=20, cap=.01, col=3)
)

lines(seq(0,max(df7[,1]),0.1),gompertz_indels(coef(fitmodel_i),seq(0,max(df7[,1]),0.1))*100,lwd=2, col=3)

lines(wildtype5$time, wildtype5$mean, type="n")
par(fg=2) 
with (
  data = wildtype5
  , expr = errbar(time, mean, mean+sd, mean-sd, add=T, pch=20, cap=.01, col=2)
)

lines(seq(0,max(df7[,1]),0.1),gompertz(coef(fitmodel),seq(0,max(df7[,1]),0.1))*100,lwd=2, col=2)


lines(mutations4_c$time, mutations4_c$mean, type="n")
par(fg="grey") 
with (
  data = mutations4_c
  , expr = errbar(time, mean, mean+sd, mean-sd, add=T, pch=20, cap=.01, col="gray15")
)

lines(df6c[c(1,2,3,5,6),1],df6c[c(1,2,3,5,6),2],col="grey15")


par(fg=1) 
legend("topright", legend=c("intact", "indel +shield", "indel -shield", paste("n =", ncol(mutations_tot_b)) ), pch=16, col=c(2, 3, "gray15", NA), bty="n")

TIDE data

#run functions
TIDE_ts <- timeseriesplots_TIDE(q1=q1, q2=q2, j = j, time_all = time_all)
mutations_tot_TIDE_LBR2 <- TIDE_ts$mutations_tot
wildtype_tot_TIDE_LBR2 <- TIDE_ts$wildtype_tot

#normalized
TIDE_ts_av <- timeseriesplots_TIDE_norm(j = j, time_all = time_all, wildtype_tot = TIDE_ts$wildtype_tot, mutations_tot = TIDE_ts$mutations_tot)
mutations3_TIDE_LBR2 <- TIDE_ts_av$mutations3
wildtype4_TIDE_LBR2 <- TIDE_ts_av$wildtype4

#+1 vs -7
TIDE_indels_ts <- timeseriesplots_indel_TIDE(time_all=time_all, j=j, q1=q1, qi2=qi2, qi3=qi3, qi4=qi4)

Specific indels

ts_i_1 <- timeseriesplots_indel(V_order = V_order, Z_order = Z_order, jt_1, time_all = time_all)
#ts_i_5 <- timeseriesplots_indel(V_order = V_order, Z_order = Z_order, jt_5, time_all = time_long)

insertion1_tot_LBR2 <- ts_i_1$insertion1_tot
deletion7_tot_LBR2 <- ts_i_1$deletion7_tot
intact_tot_LBR2 <- ts_i_1$intact_tot
mut3_tot_LBR2 <- ts_i_1$mut3_tot

#example of timeseries 1 of +1 insertion and -7 deletion
example1 <- gompertzfit_Mutants(time=time_all, wt= intact_tot_LBR2[,1], plus1 = insertion1_tot_LBR2[,1], minus7 = deletion7_tot_LBR2[,1], othermut = mut3_tot_LBR2[,1])

#####################################
#normalized to untransfected fraction
######################################

#total
ts_i_norm_1 <- timeseriesplots_indel_norm(jt = jt_1, time_all = time_all, insertion1_tot = ts_i_1$insertion1_tot, deletion7_tot = ts_i_1$deletion7_tot, mut3_tot=ts_i_1$mut3_tot, intact_tot=ts_i_1$intact_tot, wildtype_tot=ts_1$wildtype_tot, norm2_m=ts_norm_1$norm2_m, norm_w=ts_norm_1$norm_w)


###without inhibitor
ts_norm_intact <- ts_i_norm_1$intact2[,-c(4,6,8,10)]
ts_norm_plus1 <- ts_i_norm_1$insertion1c[,-c(4,6,8,10)]
ts_norm_min7 <- ts_i_norm_1$deletion7c[,-c(4,6,8,10)]
ts_norm_mut <- ts_i_norm_1$mut3c[,-c(4,6,8,10)]

ts_norm_intactb <- ts_norm_intact[-nrow(ts_norm_intact),];
ts_norm_plus1b <- ts_norm_plus1[-nrow(ts_norm_plus1),];
ts_norm_min7b <- ts_norm_min7[-nrow(ts_norm_min7),];
ts_norm_mutb <- ts_norm_mut[-nrow(ts_norm_mut),];


####with inhibitor
ts_norm_intactN <- ts_i_norm_1$intact2[,c(4,6,8,10)]
ts_norm_plus1N <- ts_i_norm_1$insertion1c[,c(4,6,8,10)]
ts_norm_min7N <- ts_i_norm_1$deletion7c[,c(4,6,8,10)]
ts_norm_mutN <- ts_i_norm_1$mut3c[,c(4,6,8,10)]

ts_norm_intactNb <- ts_norm_intactN[-nrow(ts_norm_intactN),];
ts_norm_plus1Nb <- ts_norm_plus1N[-nrow(ts_norm_plus1N),];
ts_norm_min7Nb <- ts_norm_min7N[-nrow(ts_norm_min7N),];
ts_norm_mutNb <- ts_norm_mutN[-nrow(ts_norm_mutN),];


#individual
###without inhibitor
gomp_i_ind <- sapply((1:ncol(ts_norm_intact)), function(idx){gompertzfit_Mutants(time_all=time_all, wt=ts_norm_intactb[,idx], plus1=ts_norm_plus1b[,idx], minus7 = ts_norm_min7b[,idx], othermut = ts_norm_mutb[,idx])})

gom_D_1A <- gomp_i_ind[1,]  
gom_D_1B <- gomp_i_ind[4,]
gom_D_1C <- gomp_i_ind[7,]

gom_D_7A <- gomp_i_ind[2,]  
gom_D_7B <- gomp_i_ind[5,]
gom_D_7C <- gomp_i_ind[8,]

gom_D_oA <- gomp_i_ind[3,] 
gom_D_oB <- gomp_i_ind[6,] 
gom_D_oC <- gomp_i_ind[9,] 

####with inhibitor
gomp_i_indN <- sapply((1:ncol(ts_norm_intactNb)), function(idx){gompertzfit_Mutants(time_all=time_all, wt=ts_norm_intactNb[,idx], plus1=ts_norm_plus1Nb[,idx], minus7 = ts_norm_min7Nb[,idx], othermut = ts_norm_mutNb[,idx])})

gom_N_1A <- gomp_i_indN[1,]  
gom_N_1B <- gomp_i_indN[4,]
gom_N_1C <- gomp_i_indN[7,]

gom_N_7A <- gomp_i_indN[2,]  
gom_N_7B <- gomp_i_indN[5,]
gom_N_7C <- gomp_i_indN[8,]

gom_N_oA <- gomp_i_indN[3,]
gom_N_oB <- gomp_i_indN[6,]
gom_N_oC <- gomp_i_indN[9,]


#####################################
#normalized to total indel
######################################

#total
ts_i_norm_2 <- timeseriesplots_indel_norm(jt = jt_1, time_all = time_all, insertion1_tot = ts_i_1$insertion1_tot, deletion7_tot = ts_i_1$deletion7_tot, mut3_tot=ts_i_1$mut3_tot, intact_tot=ts_i_1$intact_tot, wildtype_tot=ts_1$wildtype_tot, norm2_m=ts_norm_2$norm2_m, norm_w=ts_norm_1$norm_w)

Specific indels - DMSO vs NU7441

##########################################
##normalized to untransfected fraction
##########################################
gom_D_A <- matrix(c(gom_D_tA, gom_D_1A, gom_D_7A, gom_D_oA), nrow =length(gom_D_tA), ncol=4)
gom_N_A <-  matrix(c(gom_N_tA, gom_N_1A, gom_N_7A, gom_N_oA), nrow =length(gom_N_tA), ncol=4)
error_DA <- c(sd(gom_D_A[,1]), sd(gom_D_A[,2]), sd(gom_D_A[,3]), sd(gom_D_A[,4]))
error_NA <- c(sd(gom_N_A[,1]), sd(gom_N_A[,2]), sd(gom_N_A[,3]), sd(gom_N_A[,4]))

gom_A <- data.frame(gomDA=colMeans(gom_D_A), gomNA=colMeans(gom_N_A))
gom_A_er <- data.frame(gomDA=error_DA, gomNA=error_NA)

df <- data.frame(bar = colMeans(gom_D_A)*100, bar2 = colMeans(gom_N_A)*100,
                 error = error_DA*100,  error2 = error_NA*100)

df2 <- as.matrix(t(df[,1:2]))
df3 <- as.matrix(t(df[,3:4]))

foo <- barplot(df2, ylim=c(0,100),border=NA, beside=TRUE, ylab="asymptote", names=c("total", "+1", "-7", "other mutations"))
arrows(x0=foo,y0=df2+df3 ,y1=df2-df3 ,angle=90,code=3,length=0.1)

##########################################
##normalized to total indels
##########################################

normD <- 1/gom_D_A[,1]
gom_D_A_norm <- gom_D_A*normD
normN <- 1/gom_N_A[,1]
gom_N_A_norm <- gom_N_A*normN
error_DA_norm <- c(sd(gom_D_A_norm[,1]), sd(gom_D_A_norm[,2]), sd(gom_D_A_norm[,3]), sd(gom_D_A_norm[,4]))
error_NA_norm <- c(sd(gom_N_A_norm[,1]), sd(gom_N_A_norm[,2]), sd(gom_N_A_norm[,3]), sd(gom_N_A_norm[,4]))

gom_A <- data.frame(gomDA=colMeans(gom_D_A_norm), gomNA=colMeans(gom_N_A_norm))
gom_A_er <- data.frame(gomDA=error_DA_norm, gomNA=error_NA_norm)

df <- data.frame(bar = colMeans(gom_D_A_norm)*100, bar2 = colMeans(gom_N_A_norm)*100,
                 error = error_DA_norm*100,  error2 = error_NA_norm*100)

df2 <- as.matrix(t(df[,1:2]))
df3 <- as.matrix(t(df[,3:4]))

foo <- barplot(df2, ylim=c(0,100),border=NA, beside=TRUE, ylab="asymptote", names=c("total", "+1", "-7", "other mutations"))
arrows(x0=foo,y0=df2+df3 ,y1=df2-df3 ,angle=90,code=3,length=0.1)
## Warning in arrows(x0 = foo, y0 = df2 + df3, y1 = df2 - df3, angle = 90, :
## zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(x0 = foo, y0 = df2 + df3, y1 = df2 - df3, angle = 90, :
## zero-length arrow is of indeterminate angle and so skipped

##
t.test(gom_D_A_norm[,2],gom_N_A_norm[,2])
## 
##  Welch Two Sample t-test
## 
## data:  gom_D_A_norm[, 2] and gom_N_A_norm[, 2]
## t = 10.123, df = 4.1772, p-value = 0.0004293
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.2486772 0.4324001
## sample estimates:
## mean of x mean of y 
## 0.5873228 0.2467841
t.test(gom_D_A_norm[,3],gom_N_A_norm[,3])
## 
##  Welch Two Sample t-test
## 
## data:  gom_D_A_norm[, 3] and gom_N_A_norm[, 3]
## t = -9.8271, df = 3.5197, p-value = 0.001108
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.4007347 -0.2165373
## sample estimates:
## mean of x mean of y 
## 0.1824336 0.4910696
t.test(gom_D_A_norm[,4],gom_N_A_norm[,4])
## 
##  Welch Two Sample t-test
## 
## data:  gom_D_A_norm[, 4] and gom_N_A_norm[, 4]
## t = -3.6555, df = 8.1772, p-value = 0.006206
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.05412667 -0.01234903
## sample estimates:
## mean of x mean of y 
## 0.2301931 0.2634309

Additional IR

#TIDE data

tot_mut_IR_a <- c(73.2, 78.1, 81.6)
plus1_IR_a <- c(56.8, 59.1, 63.7)
min7_IR_a <- c(11.1, 13.1, 13.4)
tot_IR_a <- c(97,96,95.8)

tot_mut_10gy_a <- c(75.4, 76.7, 77.8)
plus1_10gy_a <- c(65.4, 63.1, 69.3)
min7_10gy_a <- c(5.6, 5.6, 5)
tot_10gy_a <- c(98,94.2,95.9)


##########################################
##normalized to for experiments
##########################################

##
tot_mut_IR <- (tot_mut_IR_a/tot_mut_IR_a)*tot_mut_IR_a[1]
plus1_IR <- (tot_mut_IR_a[1]/tot_mut_IR_a)*plus1_IR_a
min7_IR <-  (tot_mut_IR_a[1]/tot_mut_IR_a)*min7_IR_a

tot_mut_10gy <- (tot_mut_10gy_a/tot_mut_IR_a)*tot_mut_IR_a[1]
#tot_mut_10gy <- (tot_mut_IR_a[1]/tot_mut_IR_a)*tot_mut_10gy_a
plus1_10gy <- (tot_mut_IR_a[1]/tot_mut_IR_a)*plus1_10gy_a
min7_10gy <- (tot_mut_IR_a[1]/tot_mut_IR_a)*min7_10gy_a

##
tot_mut_10gy <- (tot_mut_IR[1]/tot_mut_10gy)*tot_mut_10gy
plus1_10gy <- (tot_mut_IR[1]/tot_mut_10gy)*plus1_10gy
min7_10gy <- (tot_mut_IR[1]/tot_mut_10gy)*min7_10gy

##
plus1 <- c(mean(plus1_IR), mean(plus1_10gy))
tot_m <- c(mean(tot_mut_IR), mean(tot_mut_10gy))
min7 <- c(mean(min7_IR), mean(min7_10gy))

sdplus1 <- c(sd(plus1_IR), sd(plus1_10gy))
sdwt <- c(sd(tot_mut_IR), sd(tot_mut_10gy))
sdmin7 <- c(sd(min7_IR), sd(min7_10gy)) 

IR <- matrix(c(tot_m, plus1, min7), ncol=3, nrow=2)
errorIR <- matrix(c(sdwt, sdplus1, sdmin7), ncol=3, nrow=2)

df <- data.frame(bar = IR[1,], bar2 = IR[2,],
                 error = errorIR[1,],  error2 = errorIR[2,])

df2 <- as.matrix(t(df[,1:2]))
df3 <- as.matrix(t(df[,3:4]))

foo <- barplot(df2, ylim=c(0,100),border=NA, beside=TRUE, ylab="indel/(intact+indel)", names=c("total", "+1", "-7"))
arrows(x0=foo,y0=df2+df3 ,y1=df2-df3 ,angle=90,code=3,length=0.1)
## Warning in arrows(x0 = foo, y0 = df2 + df3, y1 = df2 - df3, angle = 90, :
## zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(x0 = foo, y0 = df2 + df3, y1 = df2 - df3, angle = 90, :
## zero-length arrow is of indeterminate angle and so skipped

##########################################
##normalized to total indels
##########################################

factor <- 100/tot_mut_IR_a
factor2 <- 100/tot_mut_10gy_a

tot_mut_IR <- tot_mut_IR_a*factor
plus1_IR <- plus1_IR_a*factor
min7_IR <-  min7_IR_a*factor

tot_mut_10gy <- tot_mut_10gy_a*factor2
plus1_10gy <- plus1_10gy_a*factor2
min7_10gy <- min7_10gy_a*factor2

##
t.test(plus1_IR,plus1_10gy)
## 
##  Welch Two Sample t-test
## 
## data:  plus1_IR and plus1_10gy
## t = -4.1931, df = 2.5278, p-value = 0.03438
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -16.45824  -1.37440
## sample estimates:
## mean of x mean of y 
##  77.11052  86.02684
t.test(min7_IR,min7_10gy)
## 
##  Welch Two Sample t-test
## 
## data:  min7_IR and min7_10gy
## t = 15.607, df = 3.4152, p-value = 0.0002697
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   7.339759 10.796178
## sample estimates:
## mean of x mean of y 
## 16.119624  7.051655
##
plus1 <- c(mean(plus1_IR), mean(plus1_10gy))
tot_m <- c(mean(tot_mut_IR), mean(tot_mut_10gy))
min7 <- c(mean(min7_IR), mean(min7_10gy))

sdplus1 <- c(sd(plus1_IR), sd(plus1_10gy))
sdwt <- c(sd(tot_mut_IR), sd(tot_mut_10gy))
sdmin7 <- c(sd(min7_IR), sd(min7_10gy)) 

IR <- matrix(c(tot_m, plus1, min7), ncol=3, nrow=2)
errorIR <- matrix(c(sdwt, sdplus1, sdmin7), ncol=3, nrow=2)

df <- data.frame(bar = IR[1,], bar2 = IR[2,],
                 error = errorIR[1,],  error2 = errorIR[2,])

df2 <- as.matrix(t(df[,1:2]))
df3 <- as.matrix(t(df[,3:4]))

foo <- barplot(df2, ylim=c(0,100),border=NA, beside=TRUE, ylab="indel/(intact+indel)", names=c("total", "+1", "-7"))
arrows(x0=foo,y0=df2+df3 ,y1=df2-df3 ,angle=90,code=3,length=0.1)
## Warning in arrows(x0 = foo, y0 = df2 + df3, y1 = df2 - df3, angle = 90, :
## zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(x0 = foo, y0 = df2 + df3, y1 = df2 - df3, angle = 90, :
## zero-length arrow is of indeterminate angle and so skipped

Indel spectrum and composition

#example for LBR2 time series 1
is_1 <- indel_spectrum(V_order = V_order, Z_order = Z_order, j=1)

#example for LBR2 time series 1
#score=0 
ic0_1 <- indel_composition(X_order=X_order, V_order = V_order, Z_order = Z_order, j=1, indel=0, rg1=70, rg2=90)

#score=+1
ic1_1 <- indel_composition(X_order=X_order, V_order = V_order, Z_order = Z_order, j=1, indel=1, rg1=70, rg2=90)

#score=-7
ic7_1 <- indel_composition(X_order=X_order, V_order = V_order, Z_order = Z_order, j=1, indel=-7, rg1=70, rg2=90)

Export data

write.table(wildtype_tot_LBR2, "wildtype_tot_LBR2.txt", row.names = T, col.names = T, sep="\t")
write.table(wildtype_tot_LBR8, "wildtype_tot_LBR8.txt", row.names = T, col.names = T, sep="\t")
write.table(wildtype_tot_AAVS1, "wildtype_tot_AAVS1.txt", row.names = T, col.names = T, sep="\t")
write.table(wildtype_tot_chr11, "wildtype_tot_chr11.txt", row.names = T, col.names = T, sep="\t")

write.table(insertion1_tot_LBR2, "insertion1_LBR2.txt", row.names = T, col.names = T, sep="\t")
write.table(deletion7_tot_LBR2, "deletion7_LBR2.txt", row.names = T, col.names = T, sep="\t")
write.table(intact_tot_LBR2, "intact_LBR2.txt", row.names = T, col.names = T, sep="\t")
write.table(mut3_tot_LBR2, "othermut_LBR2.txt", row.names = T, col.names = T, sep="\t")

SessionInfo

sessionInfo()
## R version 3.3.2 (2016-10-31)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 7 x64 (build 7601) Service Pack 1
## 
## locale:
## [1] LC_COLLATE=Dutch_Netherlands.1252  LC_CTYPE=Dutch_Netherlands.1252   
## [3] LC_MONETARY=Dutch_Netherlands.1252 LC_NUMERIC=C                      
## [5] LC_TIME=Dutch_Netherlands.1252    
## 
## attached base packages:
##  [1] grid      stats4    parallel  stats     graphics  grDevices utils    
##  [8] datasets  methods   base     
## 
## other attached packages:
##  [1] Hmisc_4.0-2         ggplot2_2.2.1       Formula_1.2-1      
##  [4] survival_2.40-1     lattice_0.20-34     seqLogo_1.40.0     
##  [7] Biostrings_2.42.1   XVector_0.14.0      IRanges_2.8.1      
## [10] S4Vectors_0.12.1    BiocGenerics_0.20.0 FME_1.3.5          
## [13] coda_0.19-1         rootSolve_1.7       minpack.lm_1.2-1   
## [16] deSolve_1.14        plyr_1.8.4         
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_0.12.8         RColorBrewer_1.1-2  base64enc_0.1-3    
##  [4] tools_3.3.2         zlibbioc_1.20.0     rpart_4.1-10       
##  [7] digest_0.6.11       checkmate_1.8.2     htmlTable_1.8      
## [10] evaluate_0.10       tibble_1.2          gtable_0.2.0       
## [13] Matrix_1.2-7.1      yaml_2.1.14         gridExtra_2.2.1    
## [16] cluster_2.0.5       stringr_1.1.0       knitr_1.20         
## [19] nnet_7.3-12         rprojroot_1.2       data.table_1.10.0  
## [22] foreign_0.8-67      rmarkdown_1.5       latticeExtra_0.6-28
## [25] minqa_1.2.4         magrittr_1.5        backports_1.0.5    
## [28] scales_0.4.1        htmltools_0.3.5     MASS_7.3-45        
## [31] splines_3.3.2       assertthat_0.1      colorspace_1.3-2   
## [34] stringi_1.1.2       acepack_1.4.1       lazyeval_0.2.0     
## [37] munsell_0.4.3